Mercurial > repos > bgruening > flexynesis
diff flexynesis.xml @ 1:b353dad17ab7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 973836fb40ecb9c0ac26f675d12b20fc8e5f51f4
author | bgruening |
---|---|
date | Mon, 14 Apr 2025 09:56:16 +0000 |
parents | 98431bd19f18 |
children |
line wrap: on
line diff
--- a/flexynesis.xml Mon Aug 12 17:58:02 2024 +0000 +++ b/flexynesis.xml Mon Apr 14 09:56:16 2025 +0000 @@ -71,6 +71,7 @@ #end if --fusion_type $fusion_type --hpo_iter $hpo_iter + --val_size $val_size --finetuning_samples $finetuning_samples --variance_threshold $variance_threshold --correlation_threshold $correlation_threshold @@ -84,6 +85,7 @@ $use_loss_weighting $use_cv $evaluate_baseline_performance + --feature_importance_method $feature_importance_method $disable_marker_finding \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS} ]]></command> @@ -110,6 +112,7 @@ <option value="RandomForest">RandomForest</option> <option value="SVM">SVM</option> <option value="RandomSurvivalForest">RandomSurvivalForest</option> + <option value="XGBoost">XGBoost</option> </param> <when value="DirectPred"/> <when value="GNN"> @@ -133,6 +136,7 @@ <when value="RandomForest"/> <when value="SVM"/> <when value="RandomSurvivalForest"/> + <when value="XGBoost"/> </conditional> <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple."> <sanitizer invalid_char=""> @@ -189,22 +193,26 @@ <tests> <test> <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="bar"/> - <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> - <param name="assay" value="foo"/> - </repeat> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="model_class" value="DirectPred"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + </repeat> + <conditional name="model_class"> + <param name="model_class" value="DirectPred"/> + </conditional> <param name="target_variables" value="Erlotinib"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> </conditional> - <param name="hpo_iter" value="1"/> <output_collection name="results" type="list"> <element name="job.embeddings_test"> <assert_contents> @@ -216,10 +224,18 @@ <has_n_lines n="50"/> </assert_contents> </element> - <element name="job.feature_importance"> + <element name="job.feature_importance.GradientShap"> <assert_contents> <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="GradientShap"/> + </assert_contents> + </element> + <element name="job.feature_importance.IntegratedGradients"> + <assert_contents> + <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> + <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> <element name="job.feature_logs.bar"> @@ -249,17 +265,21 @@ </test> <test> <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="bar"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="model_class" value="DirectPred"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <conditional name="model_class"> + <param name="model_class" value="DirectPred"/> + </conditional> <param name="target_variables" value="Erlotinib"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> </conditional> - <param name="hpo_iter" value="1"/> <output_collection name="results" type="list"> <element name="job.embeddings_test"> <assert_contents> @@ -271,10 +291,18 @@ <has_n_lines n="50"/> </assert_contents> </element> - <element name="job.feature_importance"> + <element name="job.feature_importance.GradientShap"> <assert_contents> <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="GradientShap"/> + </assert_contents> + </element> + <element name="job.feature_importance.IntegratedGradients"> + <assert_contents> + <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> + <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> <element name="job.feature_logs.bar"> @@ -299,22 +327,26 @@ </test> <test> <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="bar"/> - <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> - <param name="assay" value="foo"/> - </repeat> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="model_class" value="DirectPred"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + </repeat> + <conditional name="model_class"> + <param name="model_class" value="DirectPred"/> + </conditional> <param name="target_variables" value="Irinotecan"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> </conditional> - <param name="hpo_iter" value="1"/> <output_collection name="results" type="list"> <element name="job.embeddings_test"> <assert_contents> @@ -326,10 +358,18 @@ <has_n_lines n="50"/> </assert_contents> </element> - <element name="job.feature_importance"> + <element name="job.feature_importance.GradientShap"> <assert_contents> <has_text_matching expression="Irinotecan,0,,bar,A2M,"/> <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/> + <has_text_matching expression="GradientShap"/> + </assert_contents> + </element> + <element name="job.feature_importance.IntegratedGradients"> + <assert_contents> + <has_text_matching expression="Irinotecan,0,,bar,A2M,"/> + <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/> + <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> <element name="job.feature_logs.bar"> @@ -337,7 +377,7 @@ <has_n_lines n="25"/> </assert_contents> </element> - <element name="job.feature_logs.bar"> + <element name="job.feature_logs.omics_foo"> <assert_contents> <has_n_lines n="25"/> </assert_contents> @@ -359,21 +399,23 @@ </test> <test> <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="bar"/> - <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> - <param name="assay" value="foo"/> - </repeat> <conditional name="training_type"> <param name="model" value="us_train"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + </repeat> <param name="model_class" value="supervised_vae"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> </conditional> - <param name="hpo_iter" value="1"/> <output_collection name="results" type="list"> <element name="job.embeddings_test"> <assert_contents> @@ -399,23 +441,25 @@ </test> <test> <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="bar"/> - <param name="layer_main" value="input"/> - <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> - <param name="assay" value="foo"/> - <param name="layer" value="output"/> - </repeat> <conditional name="training_type"> <param name="model" value="cm_train"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <param name="layer_main" value="input"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + <param name="layer" value="output"/> + </repeat> <param name="model_class" value="CrossModalPred"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> </conditional> - <param name="hpo_iter" value="1"/> <output_collection name="results" type="list"> <element name="job.embeddings_test"> <assert_contents> @@ -451,25 +495,29 @@ </test> <test> <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="bar"/> - <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> - <param name="assay" value="foo"/> - </repeat> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="model_class" value="GNN"/> - <param name="gnn_conv_type" value="GC"/> - <param name="string_organism" value="9606"/> - <param name="string_node_name" value="gene_name"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + </repeat> + <conditional name="model_class"> + <param name="model_class" value="GNN"/> + <param name="gnn_conv_type" value="GC"/> + <param name="string_organism" value="9606"/> + <param name="string_node_name" value="gene_name"/> + </conditional> <param name="target_variables" value="Erlotinib"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> </conditional> - <param name="hpo_iter" value="1"/> <output_collection name="results" type="list"> <element name="job.embeddings_test"> <assert_contents> @@ -481,7 +529,155 @@ <has_n_lines n="50"/> </assert_contents> </element> - <element name="job.feature_importance"> + <element name="job.feature_importance.GradientShap"> + <assert_contents> + <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> + <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="GradientShap"/> + </assert_contents> + </element> + <element name="job.feature_importance.IntegratedGradients"> + <assert_contents> + <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> + <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="IntegratedGradients"/> + </assert_contents> + </element> + <element name="job.feature_logs.bar"> + <assert_contents> + <has_n_lines n="25"/> + </assert_contents> + </element> + <element name="job.feature_logs.omics_foo"> + <assert_contents> + <has_n_lines n="25"/> + </assert_contents> + </element> + <element name="job.predicted_labels"> + <assert_contents> + <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> + <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> + </assert_contents> + </element> + <element name="job.stats"> + <assert_contents> + <has_text_matching expression="GNN,Erlotinib,numerical,mse,"/> + <has_text_matching expression="GNN,Erlotinib,numerical,r2,"/> + <has_text_matching expression="GNN,Erlotinib,numerical,pearson_corr,"/> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="non_commercial_use" value="True"/> + <conditional name="training_type"> + <param name="model" value="us_train"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="b ar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="f oo"/> + </repeat> + <param name="model_class" value="supervised_vae"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> + </conditional> + <output_collection name="results" type="list"> + <element name="job.embeddings_test"> + <assert_contents> + <has_n_lines n="50"/> + </assert_contents> + </element> + <element name="job.embeddings_train"> + <assert_contents> + <has_n_lines n="50"/> + </assert_contents> + </element> + <element name="job.feature_logs.b_ar"> + <assert_contents> + <has_n_lines n="25"/> + </assert_contents> + </element> + <element name="job.feature_logs.omics_f_oo"> + <assert_contents> + <has_n_lines n="25"/> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="non_commercial_use" value="True"/> + <conditional name="training_type"> + <param name="model" value="s_train"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + </repeat> + <conditional name="model_class"> + <param name="model_class" value="XGBoost"/> + </conditional> + <param name="target_variables" value="Erlotinib"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> + </conditional> + <output_collection name="results" type="list"> + <element name="job.stats"> + <assert_contents> + <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,mse,"/> + <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,r2,"/> + <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,pearson_corr,"/> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="non_commercial_use" value="True"/> + <conditional name="training_type"> + <param name="model" value="s_train"/> + <param name="train_clin" value="train/clin" ftype="csv"/> + <param name="test_clin" value="test/clin" ftype="csv"/> + <param name="train_omics_main" value="train/gex" ftype="csv"/> + <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="csv"/> + <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="assay" value="foo"/> + </repeat> + <conditional name="model_class"> + <param name="model_class" value="DirectPred"/> + </conditional> + <param name="target_variables" value="Erlotinib"/> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + <param name="feature_importance_method" value="IntegratedGradients"/> + <param name="val_size" value="0.2"/> + </section> + </conditional> + <output_collection name="results" type="list"> + <element name="job.embeddings_test"> + <assert_contents> + <has_n_lines n="50"/> + </assert_contents> + </element> + <element name="job.embeddings_train"> + <assert_contents> + <has_n_lines n="50"/> + </assert_contents> + </element> + <element name="job.feature_importance.IntegratedGradients"> <assert_contents> <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> @@ -512,46 +708,6 @@ </element> </output_collection> </test> - <test> - <param name="non_commercial_use" value="True"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> - <param name="assay_main" value="b ar"/> - <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> - <param name="assay" value="f oo"/> - </repeat> - <conditional name="training_type"> - <param name="model" value="us_train"/> - <param name="model_class" value="supervised_vae"/> - </conditional> - <param name="hpo_iter" value="1"/> - <output_collection name="results" type="list"> - <element name="job.embeddings_test"> - <assert_contents> - <has_n_lines n="50"/> - </assert_contents> - </element> - <element name="job.embeddings_train"> - <assert_contents> - <has_n_lines n="50"/> - </assert_contents> - </element> - <element name="job.feature_logs.b_ar"> - <assert_contents> - <has_n_lines n="25"/> - </assert_contents> - </element> - <element name="job.feature_logs.omics_f_oo"> - <assert_contents> - <has_n_lines n="25"/> - </assert_contents> - </element> - </output_collection> - </test> </tests> <help> .. class:: warningmark