diff flexynesis.xml @ 1:b353dad17ab7 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 973836fb40ecb9c0ac26f675d12b20fc8e5f51f4
author bgruening
date Mon, 14 Apr 2025 09:56:16 +0000
parents 98431bd19f18
children
line wrap: on
line diff
--- a/flexynesis.xml	Mon Aug 12 17:58:02 2024 +0000
+++ b/flexynesis.xml	Mon Apr 14 09:56:16 2025 +0000
@@ -71,6 +71,7 @@
                     #end if
                     --fusion_type $fusion_type
                     --hpo_iter $hpo_iter
+                    --val_size $val_size
                     --finetuning_samples $finetuning_samples
                     --variance_threshold $variance_threshold
                     --correlation_threshold $correlation_threshold
@@ -84,6 +85,7 @@
                     $use_loss_weighting
                     $use_cv
                     $evaluate_baseline_performance
+                    --feature_importance_method $feature_importance_method
                     $disable_marker_finding
                     \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS}
     ]]></command>
@@ -110,6 +112,7 @@
                         <option value="RandomForest">RandomForest</option>
                         <option value="SVM">SVM</option>
                         <option value="RandomSurvivalForest">RandomSurvivalForest</option>
+                        <option value="XGBoost">XGBoost</option>
                     </param>
                     <when value="DirectPred"/>
                     <when value="GNN">
@@ -133,6 +136,7 @@
                     <when value="RandomForest"/>
                     <when value="SVM"/>
                     <when value="RandomSurvivalForest"/>
+                    <when value="XGBoost"/>
                 </conditional>
                 <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple.">
                     <sanitizer invalid_char="">
@@ -189,22 +193,26 @@
     <tests>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="DirectPred"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
                 <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -216,10 +224,18 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
                     <assert_contents>
                         <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
                         <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -249,17 +265,21 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="DirectPred"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
                 <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -271,10 +291,18 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
                     <assert_contents>
                         <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
                         <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -299,22 +327,26 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="DirectPred"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
                 <param name="target_variables" value="Irinotecan"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -326,10 +358,18 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
                     <assert_contents>
                         <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
                         <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
+                        <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -337,7 +377,7 @@
                         <has_n_lines n="25"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_logs.bar">
+                <element name="job.feature_logs.omics_foo">
                     <assert_contents>
                         <has_n_lines n="25"/>
                     </assert_contents>
@@ -359,21 +399,23 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="us_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
                 <param name="model_class" value="supervised_vae"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -399,23 +441,25 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <param name="layer_main" value="input"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-                <param name="layer" value="output"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="cm_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <param name="layer_main" value="input"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                    <param name="layer" value="output"/>
+                </repeat>
                 <param name="model_class" value="CrossModalPred"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -451,25 +495,29 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="GNN"/>
-                <param name="gnn_conv_type" value="GC"/>
-                <param name="string_organism" value="9606"/>
-                <param name="string_node_name" value="gene_name"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="GNN"/>
+                    <param name="gnn_conv_type" value="GC"/>
+                    <param name="string_organism" value="9606"/>
+                    <param name="string_node_name" value="gene_name"/>
+                </conditional>
                 <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -481,7 +529,155 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.bar">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.omics_foo">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.predicted_labels">
+                    <assert_contents>
+                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
+                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                    </assert_contents>
+                </element>
+                <element name="job.stats">
+                    <assert_contents>
+                        <has_text_matching expression="GNN,Erlotinib,numerical,mse,"/>
+                        <has_text_matching expression="GNN,Erlotinib,numerical,r2,"/>
+                        <has_text_matching expression="GNN,Erlotinib,numerical,pearson_corr,"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="us_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="b ar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="f oo"/>
+                </repeat>
+                <param name="model_class" value="supervised_vae"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
+            </conditional>
+            <output_collection name="results" type="list">
+                <element name="job.embeddings_test">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.embeddings_train">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.b_ar">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.omics_f_oo">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="s_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="XGBoost"/>
+                </conditional>
+                <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
+            </conditional>
+            <output_collection name="results" type="list">
+                <element name="job.stats">
+                    <assert_contents>
+                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,mse,"/>
+                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,r2,"/>
+                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,pearson_corr,"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="s_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
+                <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                    <param name="feature_importance_method" value="IntegratedGradients"/>
+                    <param name="val_size" value="0.2"/>
+                </section>
+            </conditional>
+            <output_collection name="results" type="list">
+                <element name="job.embeddings_test">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.embeddings_train">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
                         <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
                         <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
@@ -512,46 +708,6 @@
                 </element>
             </output_collection>
         </test>
-        <test>
-            <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="b ar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="f oo"/>
-            </repeat>
-            <conditional name="training_type">
-                <param name="model" value="us_train"/>
-                <param name="model_class" value="supervised_vae"/>
-            </conditional>
-            <param name="hpo_iter" value="1"/>
-            <output_collection name="results" type="list">
-                <element name="job.embeddings_test">
-                    <assert_contents>
-                        <has_n_lines n="50"/>
-                    </assert_contents>
-                </element>
-                <element name="job.embeddings_train">
-                    <assert_contents>
-                        <has_n_lines n="50"/>
-                    </assert_contents>
-                </element>
-                <element name="job.feature_logs.b_ar">
-                    <assert_contents>
-                        <has_n_lines n="25"/>
-                    </assert_contents>
-                </element>
-                <element name="job.feature_logs.omics_f_oo">
-                    <assert_contents>
-                        <has_n_lines n="25"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-        </test>
     </tests>
     <help>
 .. class:: warningmark