view keras_batch_models.xml @ 17:08228c7fcf48 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 80417bf0158a9b596e485dd66408f738f405145a
author bgruening
date Mon, 02 Oct 2023 08:18:30 +0000
parents 70846a2dd227
children
line wrap: on
line source

<tool id="keras_batch_models" name="Build Deep learning Batch Training Models" version="@VERSION@" profile="@PROFILE@">
  <description>with online data generator for Genomic/Protein sequences and images</description>
  <macros>
    <import>main_macros.xml</import>
    <import>keras_macros.xml</import>
  </macros>
  <expand macro="python_requirements" />
  <expand macro="macro_stdio" />
  <version_command>echo "@VERSION@"</version_command>
  <command>
    <![CDATA[
    python '$__tool_directory__/keras_deep_learning.py'
           --inputs '$inputs'
           --model_json '$mode_selection.infile_json'
           --tool_id 'keras_batch_models'
           --outfile '$outfile'
    ]]>
  </command>
  <configfiles>
    <inputs name="inputs" />
  </configfiles>
  <inputs>
    <conditional name="mode_selection">
      <param name="mode_type" type="select" label="Choose a building mode">
        <option value="train_model" selected="true">Build a training model</option>
      </param>
      <when value="train_model">
        <param name="infile_json" type="data" format="json" label="Select the dataset containing model configurations (JSON)" />
        <param name="learning_type" type="select" label="Select a learning class">
          <option value="KerasGBatchClassifier">KerasGBatchClassifier -- Build a training model with batch data generator</option>
        </param>
        <conditional name="generator_selection">
          <param name="generator_type" type="select" label="Select a batch data generator">
            <option value="FastaDNABatchGenerator" selected="true">FastaDNABatchGenerator -- Online transformation of DNA sequences</option>
            <option value="FastaRNABatchGenerator">FastaRNABatchGenerator -- Online transformation of RNA sequences</option>
            <option value="FastaProteinBatchGenerator">FastaProteinBatchGenerator -- Online transformation of Protein sequences</option>
            <option value="GenomicIntervalBatchGenerator">GenomicIntervalBatchGenerator - Online transformation of genomic sequences from a reference genome and intervals</option>
            <option value="none">To be determined</option>
          </param>
          <when value="FastaDNABatchGenerator">
            <expand macro="params_fasta_dna_batch_generator" />
          </when>
          <when value="FastaRNABatchGenerator">
            <expand macro="params_fasta_dna_batch_generator" />
          </when>
          <when value="FastaProteinBatchGenerator">
            <expand macro="params_fasta_protein_batch_generator" />
          </when>
          <when value="GenomicIntervalBatchGenerator">
            <expand macro="params_genomic_interval_batch_generator" />
          </when>
          <when value="none" />
        </conditional>
        <expand macro="keras_compile_params_section" />
        <expand macro="keras_fit_params_section" />
        <param name="class_positive_factor" type="float" value="1" optional="true" label="class_positive_factor" help="For binary classification only. If int, like 5, will convert to class_weight {0: 1, 1: 5}. If float, 0.2, corresponds to class_weight {0: 1/0.2, 1: 1}" />
        <param name="prediction_steps" type="integer" value="" optional="true" label="prediction_steps" help="Prediction steps. Optional. If None, it equals number of samples divided by `batch_size`." />
        <param name="random_seed" type="integer" value="" optional="true" label="Random Seed" help="Integer or blank for None. Warning: when random seed is set to an integer, training will be running in single thread mode, which may cause slowness." />
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data format="h5mlm" name="outfile" label="Keras Batch Classifier  on ${on_string}" />
  </outputs>
  <tests>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="keras05.json" ftype="json" />
        <param name="learning_type" value="KerasGBatchClassifier" />
        <conditional name="generator_selection">
          <param name="generator_type" value="none" />
        </conditional>
        <section name="compile_params">
          <param name="loss" value="categorical_crossentropy" />
          <conditional name="optimizer_selection">
            <param name="optimizer_type" value="RMSprop" />
            <section name="optimizer_options">
              <param name="lr" value="0.0001" />
              <param name="decay" value="0.000001" />
            </section>
          </conditional>
          <param name="metrics" value="acc,categorical_accuracy" />
        </section>
        <section name="fit_params">
          <param name="epochs" value="100" />
          <repeat name="callbacks">
            <conditional name="callback_selection">
              <param name="callback_type" value="None" />
            </conditional>
          </repeat>
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model05" compare="sim_size" delta="5" />
    </test>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="keras01.json" ftype="json" />
        <param name="learning_type" value="KerasGBatchClassifier" />
        <conditional name="generator_selection">
          <param name="generator_type" value="none" />
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100" />
          <repeat name="callbacks">
            <conditional name="callback_selection">
              <param name="callback_type" value="None" />
            </conditional>
          </repeat>
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model04" compare="sim_size" delta="20" />
    </test>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="deepsear_1feature.json" ftype="json" />
        <param name="learning_type" value="KerasGBatchClassifier" />
        <conditional name="generator_selection">
          <param name="generator_type" value="GenomicIntervalBatchGenerator" />
          <param name="seed" value="999" />
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100" />
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model03" compare="sim_size" delta="20" />
    </test>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="keras01.json" ftype="json" />
        <param name="learning_type" value="KerasGBatchClassifier" />
        <conditional name="generator_selection">
          <param name="generator_type" value="FastaDNABatchGenerator" />
          <param name="seed" value="999" />
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100" />
          <repeat name="callbacks">
            <conditional name="callback_selection">
              <param name="callback_type" value="EarlyStopping" />
              <param name="monitor" value="val_loss" />
            </conditional>
          </repeat>
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model02" compare="sim_size" delta="20" />
    </test>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="keras01.json" ftype="json" />
        <param name="learning_type" value="KerasGBatchClassifier" />
        <conditional name="generator_selection">
          <param name="generator_type" value="FastaDNABatchGenerator" />
          <param name="seed" value="999" />
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100" />
          <repeat name="callbacks">
            <conditional name="callback_selection">
              <param name="callback_type" value="None" />
            </conditional>
          </repeat>
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model01" compare="sim_size" delta="5" />
    </test>
  </tests>
  <help>
      <![CDATA[
**What does this tool do?**

This tool builds deep learning training models using API `galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`_, which takes parameters in FIVE categories. 

- a JSON file that contains layer information for a deep learning model.

- a data batch generator that converts raw data, such as images and genomic sequences, into numerical data to be able to fit the deep learning model. That the cycle of `batch conversion - fitting` occur in stream mode, also called on-line transformation, guarantees the training to be CPU and memory efficient. Reference: `galaxy_ml.preprocessors.FastaDNABatchGenerator`_, `galaxy_ml.preprocessors.FastaRNABatchGenerator`_, `galaxy_ml.preprocessors.FastaProteinBatchGenerator`_, `galaxy_ml.preprocessors.GenomicIntervalBatchGenerator`_.

- compile parameters, are mainly composed of loss function and optimizer.

- fit parameters, a group of variables that control the training process, referring to `galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`_ and Keras.

- other parameters, including `class_positive_factor`, `prediction_steps`, `seed` (random seed) and so on.


**Output**

A model file that could be used in `model_validation` tool or `hyperparameter search` tool.

.. _`galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#kerasgbatchclassifier

.. _`galaxy_ml.preprocessors.FastaDNABatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaDNABatchGenerator

.. _`galaxy_ml.preprocessors.FastaRNABatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaRNABatchGenerator

.. _`galaxy_ml.preprocessors.FastaProteinBatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaProteinBatchGenerator

.. _`galaxy_ml.preprocessors.GenomicIntervalBatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#GenomicIntervalBatchGenerator

      ]]>
  </help>
  <citations>
    <expand macro="keras_citation" />
    <expand macro="tensorflow_citation" />
  </citations>
</tool>