comparison main_macros.xml @ 29:c156b85a6389 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author bgruening
date Sun, 30 Dec 2018 01:39:27 -0500 (2018-12-30)
parents a62c8c1f2ef7
children 60d80322e1e9
comparison
equal deleted inserted replaced
28:2c6c3af5a3cf 29:c156b85a6389
1 <macros> 1 <macros>
2 <token name="@VERSION@">0.9</token> 2 <token name="@VERSION@">1.0</token>
3 3
4 <xml name="python_requirements"> 4 <xml name="python_requirements">
5 <requirements> 5 <requirements>
6 <requirement type="package" version="3.6">python</requirement> 6 <requirement type="package" version="3.6">python</requirement>
7 <requirement type="package" version="0.19.1">scikit-learn</requirement> 7 <requirement type="package" version="0.20.2">scikit-learn</requirement>
8 <requirement type="package" version="0.22.0">pandas</requirement> 8 <requirement type="package" version="0.23.4">pandas</requirement>
9 <requirement type="package" version="0.72.1">xgboost</requirement> 9 <requirement type="package" version="0.80">xgboost</requirement>
10 <requirement type="package" version="0.9.12">asteval</requirement> 10 <requirement type="package" version="0.9.13">asteval</requirement>
11 <yield /> 11 <yield />
12 </requirements> 12 </requirements>
13 </xml> 13 </xml>
14 14
15 <xml name="macro_stdio"> 15 <xml name="macro_stdio">
242 242
243 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> 243 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration">
244 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> 244 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/>
245 </xml> 245 </xml>
246 246
247 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> 247 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results. default=None.">
248 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> 248 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/>
249 </xml> 249 </xml>
250 250
251 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> 251 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution.">
252 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> 252 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/>
344 </xml> 344 </xml>
345 345
346 <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1"> 346 <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1">
347 <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:"> 347 <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:">
348 <option value="by_index_number" selected="true">Select columns by column index number(s)</option> 348 <option value="by_index_number" selected="true">Select columns by column index number(s)</option>
349 <option value="all_but_by_index_number">All columns BUT by column index number(s)</option>
349 <option value="by_header_name">Select columns by column header name(s)</option> 350 <option value="by_header_name">Select columns by column header name(s)</option>
350 <option value="all_but_by_index_number">All columns but by column index number(s)</option> 351 <option value="all_but_by_header_name">All columns BUT by column header name(s)</option>
351 <option value="all_but_by_header_name">All columns but by column header name(s)</option>
352 <option value="all_columns">All columns</option> 352 <option value="all_columns">All columns</option>
353 </param> 353 </param>
354 <when value="by_index_number"> 354 <when value="by_index_number">
355 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> 355 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
356 </when> 356 </when>
357 <when value="all_but_by_index_number">
358 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
359 </when>
357 <when value="by_header_name"> 360 <when value="by_header_name">
358 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> 361 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
359 </when>
360 <when value="all_but_by_index_number">
361 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
362 </when> 362 </when>
363 <when value="all_but_by_header_name"> 363 <when value="all_but_by_header_name">
364 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> 364 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
365 </when> 365 </when>
366 <when value="all_columns"> 366 <when value="all_columns">
541 <expand macro="max_iter"/> 541 <expand macro="max_iter"/>
542 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> 542 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/>
543 <!--param argument="precompute_distances"/--> 543 <!--param argument="precompute_distances"/-->
544 <expand macro="random_state"/> 544 <expand macro="random_state"/>
545 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> 545 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/>
546 <expand macro="kmeans_algorithm"/>
546 </section> 547 </section>
548 </xml>
549
550 <xml name="kmeans_algorithm">
551 <param argument="algorithm" type="select" label="K-means algorithm to use:">
552 <option value="auto" selected="true">auto</option>
553 <option value="full">full</option>
554 <option value="elkan">elkan</option>
555 </param>
547 </xml> 556 </xml>
548 557
549 <xml name="birch_advanced_options"> 558 <xml name="birch_advanced_options">
550 <section name="options" title="Advanced Options" expanded="False"> 559 <section name="options" title="Advanced Options" expanded="False">
551 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> 560 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/>
728 <option value="median">Replace missing values using the median along the axis</option> 737 <option value="median">Replace missing values using the median along the axis</option>
729 <option value="most_frequent">Replace missing using the most frequent value along the axis</option> 738 <option value="most_frequent">Replace missing using the most frequent value along the axis</option>
730 </param> 739 </param>
731 <param argument="missing_values" type="text" optional="true" value="NaN" 740 <param argument="missing_values" type="text" optional="true" value="NaN"
732 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> 741 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
733 <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" 742 <!--param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
734 label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> 743 label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> -->
735 <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" "> 744 <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" ">
736 <option value="0" selected="true">Impute along columns</option> 745 <option value="0" selected="true">Impute along columns</option>
737 <option value="1">Impute along rows</option> 746 <option value="1">Impute along rows</option>
738 </param--> 747 </param-->
739 </section> 748 </section>
800 </section> 809 </section>
801 </when> 810 </when>
802 </expand> 811 </expand>
803 </xml> 812 </xml>
804 813
805 <xml name="fs_selectfrommodel_prefitted"> 814 <xml name="cv_splitter">
806 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > 815 <option value="default" selected="true">default splitter</option>
807 <option value="new" selected="true">Yes</option> 816 <option value="KFold">KFold</option>
808 <option value="prefitted">No. Load a prefitted estimator</option> 817 <option value="StratifiedKFold">StratifiedKFold</option>
809 </param> 818 <option value="LeaveOneOut">LeaveOneOut</option>
810 <when value="new"> 819 <option value="LeavePOut">LeavePOut</option>
811 <expand macro="estimator_selector_all"/> 820 <option value="RepeatedKFold">RepeatedKFold</option>
812 </when> 821 <option value="RepeatedStratifiedKFold">RepeatedStratifiedKFold</option>
813 <when value="prefitted"> 822 <option value="ShuffleSplit">ShuffleSplit</option>
814 <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> 823 <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>
815 </when> 824 <option value="TimeSeriesSplit">TimeSeriesSplit</option>
816 </xml> 825 <option value="PredefinedSplit">PredefinedSplit</option>
817 826 <yield/>
818 <xml name="fs_selectfrommodel_no_prefitted"> 827 </xml>
819 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > 828
820 <option value="new" selected="true">Yes</option> 829 <xml name="cv_splitter_options">
821 </param> 830 <when value="default">
822 <when value="new"> 831 <expand macro="cv_n_splits"/>
823 <expand macro="estimator_selector_all"/> 832 </when>
824 </when> 833 <when value="KFold">
834 <expand macro="cv_n_splits"/>
835 <expand macro="cv_shuffle"/>
836 <expand macro="random_state"/>
837 </when>
838 <when value="StratifiedKFold">
839 <expand macro="cv_n_splits"/>
840 <expand macro="cv_shuffle"/>
841 <expand macro="random_state"/>
842 </when>
843 <when value="LeaveOneOut">
844 </when>
845 <when value="LeavePOut">
846 <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets."/>
847 </when>
848 <when value="RepeatedKFold">
849 <expand macro="cv_n_splits" value="5"/>
850 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
851 <expand macro="random_state" />
852 </when>
853 <when value="RepeatedStratifiedKFold">
854 <expand macro="cv_n_splits" value="5"/>
855 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
856 <expand macro="random_state" />
857 </when>
858 <when value="ShuffleSplit">
859 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/>
860 <expand macro="cv_test_size" value="0.1" />
861 <expand macro="random_state"/>
862 </when>
863 <when value="StratifiedShuffleSplit">
864 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/>
865 <expand macro="cv_test_size" value="0.1" />
866 <expand macro="random_state"/>
867 </when>
868 <when value="TimeSeriesSplit">
869 <expand macro="cv_n_splits"/>
870 <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." />
871 </when>
872 <when value="PredefinedSplit">
873 <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/>
874 </when>
875 <yield/>
825 </xml> 876 </xml>
826 877
827 <xml name="cv"> 878 <xml name="cv">
828 <param argument="cv" type="text" value="" optional="true" label="cv" help="Optional. Integer or evalable splitter object, e.g., StratifiedKFold(n_splits=3, shuffle=True, random_state=10). Leave blank for default." > 879 <conditional name="cv_selector">
829 <sanitizer> 880 <param name="selected_cv" type="select" label="Select the cv splitter:">
830 <valid initial="default"> 881 <expand macro="cv_splitter">
831 <add value="&apos;"/> 882 <option value="GroupKFold">GroupKFold</option>
832 </valid> 883 <option value="GroupShuffleSplit">GroupShuffleSplit</option>
833 </sanitizer> 884 <option value="LeaveOneGroupOut">LeaveOneGroupOut</option>
834 </param> 885 <option value="LeavePGroupsOut">LeavePGroupsOut</option>
835 </xml> 886 </expand>
836 887 </param>
837 <xml name="feature_selection_all"> 888 <expand macro="cv_splitter_options">
889 <when value="GroupKFold">
890 <expand macro="cv_n_splits"/>
891 <expand macro="cv_groups" />
892 </when>
893 <when value="GroupShuffleSplit">
894 <expand macro="cv_n_splits" value="5"/>
895 <expand macro="cv_test_size"/>
896 <expand macro="random_state"/>
897 <expand macro="cv_groups"/>
898 </when>
899 <when value="LeaveOneGroupOut">
900 <expand macro="cv_groups"/>
901 </when>
902 <when value="LeavePGroupsOut">
903 <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." />
904 <expand macro="cv_groups"/>
905 </when>
906 </expand>
907 </conditional>
908 </xml>
909
910 <xml name="cv_reduced">
911 <conditional name="cv_selector">
912 <param name="selected_cv" type="select" label="Select the cv splitter:">
913 <expand macro="cv_splitter"/>
914 </param>
915 <expand macro="cv_splitter_options"/>
916 </conditional>
917 </xml>
918
919 <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2.">
920 <param argument="n_splits" type="integer" value="@VALUE@" min="2" label="n_splits" help="@HELP@"/>
921 </xml>
922
923 <xml name="cv_shuffle">
924 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" />
925 </xml>
926
927 <xml name="cv_test_size" token_value="0.2">
928 <param argument="test_size" type="float" value="@VALUE@" min="0.0" label="Portion or number of the test set" help="0.0-1.0, proportion of the dataset to include in the test split; >1, integer only, the absolute number of test samples "/>
929 </xml>
930
931 <xml name="cv_groups" >
932 <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/>
933 </xml>
934
935 <xml name="feature_selection_algorithms">
936 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option>
937 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
938 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
939 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
940 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
941 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
942 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
943 <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
944 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
945 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
946 </xml>
947
948 <xml name="feature_selection_algorithm_details">
949 <when value="GenericUnivariateSelect">
950 <expand macro="feature_selection_score_function" />
951 <section name="options" title="Advanced Options" expanded="False">
952 <param argument="mode" type="select" label="Feature selection mode">
953 <option value="percentile">percentile</option>
954 <option value="k_best">k_best</option>
955 <option value="fpr">fpr</option>
956 <option value="fdr">fdr</option>
957 <option value="fwe">fwe</option>
958 </param>
959 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
960 </section>
961 </when>
962 <when value="SelectPercentile">
963 <expand macro="feature_selection_score_function" />
964 <section name="options" title="Advanced Options" expanded="False">
965 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
966 </section>
967 </when>
968 <when value="SelectKBest">
969 <expand macro="feature_selection_score_function" />
970 <section name="options" title="Advanced Options" expanded="False">
971 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
972 </section>
973 </when>
974 <when value="SelectFpr">
975 <expand macro="feature_selection_score_function" />
976 <section name="options" title="Advanced Options" expanded="False">
977 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
978 </section>
979 </when>
980 <when value="SelectFdr">
981 <expand macro="feature_selection_score_function" />
982 <section name="options" title="Advanced Options" expanded="False">
983 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
984 </section>
985 </when>
986 <when value="SelectFwe">
987 <expand macro="feature_selection_score_function" />
988 <section name="options" title="Advanced Options" expanded="False">
989 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
990 </section>
991 </when>
992 <when value="VarianceThreshold">
993 <section name="options" title="Options" expanded="False">
994 <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
995 </section>
996 </when>
997 </xml>
998
999 <xml name="feature_selection_SelectFromModel">
1000 <when value="SelectFromModel">
1001 <conditional name="model_inputter">
1002 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
1003 <option value="new" selected="true">Yes</option>
1004 <option value="prefitted">No. Load a prefitted estimator</option>
1005 </param>
1006 <when value="new">
1007 <expand macro="estimator_selector_fs"/>
1008 </when>
1009 <when value="prefitted">
1010 <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
1011 </when>
1012 </conditional>
1013 <expand macro="feature_selection_SelectFromModel_options"/>
1014 </when>
1015 </xml>
1016
1017 <xml name="feature_selection_SelectFromModel_no_prefitted">
1018 <when value="SelectFromModel">
1019 <conditional name="model_inputter">
1020 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
1021 <option value="new" selected="true">Yes</option>
1022 </param>
1023 <when value="new">
1024 <expand macro="estimator_selector_all"/>
1025 </when>
1026 </conditional>
1027 <expand macro="feature_selection_SelectFromModel_options"/>
1028 </when>
1029 </xml>
1030
1031 <xml name="feature_selection_SelectFromModel_options">
1032 <section name="options" title="Advanced Options" expanded="False">
1033 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
1034 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
1035 <param argument="max_features" type="integer" value="" optional="true" label="The maximum number of features selected scoring above threshold" help="To disable threshold and only select based on max_features, set threshold=-np.inf."/>
1036 </section>
1037 </xml>
1038
1039 <xml name="feature_selection_RFE">
1040 <when value="RFE">
1041 <yield/>
1042 <section name="options" title="Advanced Options" expanded="False">
1043 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
1044 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1045 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1046 </section>
1047 </when>
1048 </xml>
1049
1050 <xml name="feature_selection_RFECV">
1051 <when value="RFECV">
1052 <yield/>
1053 <section name="options" title="Advanced Options" expanded="False">
1054 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1055 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
1056 <expand macro="cv_reduced"/>
1057 <expand macro="scoring_selection"/>
1058 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1059 </section>
1060 </when>
1061 </xml>
1062
1063 <xml name="feature_selection_pipeline">
1064 <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no customer estimator for RFE and RFECV-->
838 <conditional name="fs_algorithm_selector"> 1065 <conditional name="fs_algorithm_selector">
839 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> 1066 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
840 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option> 1067 <expand macro="feature_selection_algorithms"/>
841 <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> 1068 </param>
842 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> 1069 <expand macro="feature_selection_algorithm_details"/>
843 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> 1070 <expand macro="feature_selection_SelectFromModel_no_prefitted"/>
844 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> 1071 <expand macro="feature_selection_RFE">
845 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
846 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
847 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
848 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
849 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
850 </param>
851 <when value="SelectFromModel">
852 <conditional name="model_inputter">
853 <yield/>
854 </conditional>
855 <section name="options" title="Advanced Options" expanded="False">
856 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
857 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
858 </section>
859 </when>
860 <when value="GenericUnivariateSelect">
861 <expand macro="feature_selection_score_function" />
862 <section name="options" title="Advanced Options" expanded="False">
863 <param argument="mode" type="select" label="Feature selection mode">
864 <option value="percentile">percentile</option>
865 <option value="k_best">k_best</option>
866 <option value="fpr">fpr</option>
867 <option value="fdr">fdr</option>
868 <option value="fwe">fwe</option>
869 </param>
870 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
871 </section>
872 </when>
873 <when value="SelectPercentile">
874 <expand macro="feature_selection_score_function" />
875 <section name="options" title="Advanced Options" expanded="False">
876 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
877 </section>
878 </when>
879 <when value="SelectKBest">
880 <expand macro="feature_selection_score_function" />
881 <section name="options" title="Advanced Options" expanded="False">
882 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
883 </section>
884 </when>
885 <when value="SelectFpr">
886 <expand macro="feature_selection_score_function" />
887 <section name="options" title="Advanced Options" expanded="False">
888 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
889 </section>
890 </when>
891 <when value="SelectFdr">
892 <expand macro="feature_selection_score_function" />
893 <section name="options" title="Advanced Options" expanded="False">
894 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
895 </section>
896 </when>
897 <when value="SelectFwe">
898 <expand macro="feature_selection_score_function" />
899 <section name="options" title="Advanced Options" expanded="False">
900 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
901 </section>
902 </when>
903 <when value="RFE">
904 <expand macro="estimator_selector_all"/> 1072 <expand macro="estimator_selector_all"/>
905 <section name="options" title="Advanced Options" expanded="False"> 1073 </expand>
906 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> 1074 <expand macro="feature_selection_RFECV">
907 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
908 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
909 </section>
910 </when>
911 <when value="RFECV">
912 <expand macro="estimator_selector_all"/> 1075 <expand macro="estimator_selector_all"/>
913 <section name="options" title="Advanced Options" expanded="False"> 1076 </expand>
914 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> 1077 </conditional>
915 <expand macro="cv"/> 1078 </xml>
916 <expand macro="scoring_selection"/> 1079
917 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> 1080 <xml name="feature_selection_fs">
918 </section> 1081 <conditional name="fs_algorithm_selector">
919 </when> 1082 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
920 <when value="VarianceThreshold"> 1083 <expand macro="feature_selection_algorithms"/>
921 <section name="options" title="Options" expanded="False"> 1084 </param>
922 <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> 1085 <expand macro="feature_selection_algorithm_details"/>
923 </section> 1086 <expand macro="feature_selection_SelectFromModel"/>
924 </when> 1087 <expand macro="feature_selection_RFE">
925 <!--when value="chi2"> 1088 <expand macro="estimator_selector_fs"/>
926 </when> 1089 </expand>
927 <when value="f_classif"> 1090 <expand macro="feature_selection_RFECV">
928 </when> 1091 <expand macro="estimator_selector_fs"/>
929 <when value="f_regression"> 1092 </expand>
930 </when>
931 <when value="mutual_info_classif">
932 </when>
933 <when value="mutual_info_regression">
934 </when-->
935 </conditional> 1093 </conditional>
936 </xml> 1094 </xml>
937 1095
938 <xml name="feature_selection_score_function"> 1096 <xml name="feature_selection_score_function">
939 <param argument="score_func" type="select" label="Select a score function"> 1097 <param argument="score_func" type="select" label="Select a score function">
941 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option> 1099 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option>
942 <option value="f_regression">f_regression - Univariate linear regression tests</option> 1100 <option value="f_regression">f_regression - Univariate linear regression tests</option>
943 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> 1101 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option>
944 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> 1102 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>
945 </param> 1103 </param>
946 </xml>
947
948 <xml name="feature_selection_output_mothods">
949 <conditional name="output_method_selector">
950 <param name="selected_method" type="select" label="Select an output method:">
951 <option value="fit_transform">fit_transform - Fit to data, then transform it</option>
952 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option>
953 </param>
954 <when value="fit_transform">
955 <!--**fit_params-->
956 </when>
957 <when value="get_support">
958 <param name="indices" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Indices" help="If True, the return value will be an array of integers, rather than a boolean mask."/>
959 </when>
960 </conditional>
961 </xml> 1104 </xml>
962 1105
963 <xml name="model_validation_common_options"> 1106 <xml name="model_validation_common_options">
964 <expand macro="cv"/> 1107 <expand macro="cv"/>
965 <expand macro="verbose"/> 1108 <expand macro="verbose"/>
1120 <expand macro="scoring_selection"/> 1263 <expand macro="scoring_selection"/>
1121 <expand macro="model_validation_common_options"/> 1264 <expand macro="model_validation_common_options"/>
1122 <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/> 1265 <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/>
1123 <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/> 1266 <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>
1124 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/> 1267 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>
1125 <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to 0 if an error occurs in estimator fitting and FitFailedWarning is raised."/> 1268 <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>
1126 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/> 1269 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/>
1127 </xml> 1270 </xml>
1128 1271
1129 <xml name="estimator_selector_all"> 1272 <xml name="estimator_module_options">
1130 <conditional name="estimator_selector"> 1273 <option value="svm" selected="true">sklearn.svm</option>
1131 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > 1274 <option value="linear_model">sklearn.linear_model</option>
1132 <option value="svm" selected="true">sklearn.svm</option> 1275 <option value="ensemble">sklearn.ensemble</option>
1133 <option value="linear_model">sklearn.linear_model</option> 1276 <option value="naive_bayes">sklearn.naive_bayes</option>
1134 <option value="ensemble">sklearn.ensemble</option> 1277 <option value="tree">sklearn.tree</option>
1135 <option value="naive_bayes">sklearn.naive_bayes</option> 1278 <option value="neighbors">sklearn.neighbors</option>
1136 <option value="tree">sklearn.tree</option> 1279 <option value="xgboost">xgboost</option>
1137 <option value="neighbors">sklearn.neighbors</option> 1280 <yield/>
1138 <option value="xgboost">xgboost</option> 1281 </xml>
1139 <!--more--> 1282
1140 </param> 1283 <xml name="estimator_suboptions">
1141 <when value="svm"> 1284 <when value="svm">
1142 <param name="selected_estimator" type="select" label="Choose estimator class:"> 1285 <param name="selected_estimator" type="select" label="Choose estimator class:">
1143 <option value="LinearSVC" selected="true">LinearSVC</option> 1286 <option value="LinearSVC" selected="true">LinearSVC</option>
1144 <option value="LinearSVR">LinearSVR</option> 1287 <option value="LinearSVR">LinearSVR</option>
1145 <option value="NuSVC">NuSVC</option> 1288 <option value="NuSVC">NuSVC</option>
1242 <option value="XGBRegressor" selected="true">XGBRegressor</option> 1385 <option value="XGBRegressor" selected="true">XGBRegressor</option>
1243 <option value="XGBClassifier">XGBClassifier</option> 1386 <option value="XGBClassifier">XGBClassifier</option>
1244 </param> 1387 </param>
1245 <expand macro="estimator_params_text"/> 1388 <expand macro="estimator_params_text"/>
1246 </when> 1389 </when>
1390 <yield/>
1391 </xml>
1392
1393 <xml name="estimator_selector_all">
1394 <conditional name="estimator_selector">
1395 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
1396 <expand macro="estimator_module_options"/>
1397 </param>
1398 <expand macro="estimator_suboptions"/>
1399 </conditional>
1400 </xml>
1401
1402 <xml name="estimator_selector_fs">
1403 <conditional name="estimator_selector">
1404 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
1405 <expand macro="estimator_module_options">
1406 <option value="customer_estimator">Load a customer estimator</option>
1407 </expand>
1408 </param>
1409 <expand macro="estimator_suboptions">
1410 <when value="customer_estimator">
1411 <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/>
1412 </when>
1413 </expand>
1247 </conditional> 1414 </conditional>
1248 </xml> 1415 </xml>
1249 1416
1250 <xml name="estimator_params_text" token_label="Type in parameter settings if different from default:" token_default_value='' 1417 <xml name="estimator_params_text" token_label="Type in parameter settings if different from default:" token_default_value=''
1251 token_help="Dictionary-capable, e.g., C=1, kernel='linear'. No double quotes. Leave this box blank for default estimator."> 1418 token_help="Dictionary-capable, e.g., C=1, kernel='linear'. No double quotes. Leave this box blank for default estimator.">
1371 <option value="ReliefF">ReliefF</option> 1538 <option value="ReliefF">ReliefF</option>
1372 <option value="SURF">SURF</option> 1539 <option value="SURF">SURF</option>
1373 <option value="SURFstar">SURFstar</option> 1540 <option value="SURFstar">SURFstar</option>
1374 <option value="MultiSURF">MultiSURF</option> 1541 <option value="MultiSURF">MultiSURF</option>
1375 <option value="MultiSURFstar">MultiSURFstar</option> 1542 <option value="MultiSURFstar">MultiSURFstar</option>
1376 <option value="TuRF">TuRF</option> 1543 <!--option value="TuRF">TuRF</option> -->
1377 </param> 1544 </param>
1378 <when value="ReliefF"> 1545 <when value="ReliefF">
1379 <expand macro="estimator_params_text" 1546 <expand macro="estimator_params_text"
1380 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False."/> 1547 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False."/>
1381 </when> 1548 </when>
1393 </when> 1560 </when>
1394 <when value="MultiSURFstar"> 1561 <when value="MultiSURFstar">
1395 <expand macro="estimator_params_text" 1562 <expand macro="estimator_params_text"
1396 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/> 1563 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/>
1397 </when> 1564 </when>
1398 <when value="TuRF"> 1565 <!--when value="TuRF">
1399 <expand macro="estimator_params_text" 1566 <expand macro="estimator_params_text"
1400 help="Default(=blank): core_algorithm='ReliefF', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False."/> 1567 help="Default(=blank): core_algorithm='ReliefF', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False."/>
1401 </when> 1568 </when> -->
1402 </conditional> 1569 </conditional>
1403 </xml> 1570 </xml>
1571
1572 <xml name="imbalanced_learn_sampling">
1573 <conditional name="imblearn_selector">
1574 <param name="select_algorithm" type="select" label="Choose the algorithm:">
1575 <option value="under_sampling.ClusterCentroids" selected="true">under_sampling.ClusterCentroids</option>
1576 <option value="under_sampling.CondensedNearestNeighbour">under_sampling.CondensedNearestNeighbour</option>
1577 <option value="under_sampling.EditedNearestNeighbours">under_sampling.EditedNearestNeighbours</option>
1578 <option value="under_sampling.RepeatedEditedNearestNeighbours">under_sampling.RepeatedEditedNearestNeighbours</option>
1579 <option value="under_sampling.AllKNN">under_sampling.AllKNN</option>
1580 <option value="under_sampling.InstanceHardnessThreshold">under_sampling.InstanceHardnessThreshold</option>
1581 <option value="under_sampling.NearMiss">under_sampling.NearMiss</option>
1582 <option value="under_sampling.NeighbourhoodCleaningRule">under_sampling.NeighbourhoodCleaningRule</option>
1583 <option value="under_sampling.OneSidedSelection">under_sampling.OneSidedSelection</option>
1584 <option value="under_sampling.RandomUnderSampler">under_sampling.RandomUnderSampler</option>
1585 <option value="under_sampling.TomekLinks">under_sampling.TomekLinks</option>
1586 <option value="over_sampling.ADASYN">over_sampling.ADASYN</option>
1587 <option value="over_sampling.RandomOverSampler">over_sampling.RandomOverSampler</option>
1588 <option value="over_sampling.SMOTE">over_sampling.SMOTE</option>
1589 <option value="over_sampling.SVMSMOTE">over_sampling.SVMSMOTE</option>
1590 <option value="over_sampling.BorderlineSMOTE">over_sampling.BorderlineSMOTE</option>
1591 <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option>
1592 <option value="combine.SMOTEENN">combine.SMOTEENN</option>
1593 <option value="combine.SMOTETomek">combine.SMOTETomek</option>
1594 </param>
1595 <when value="under_sampling.ClusterCentroids">
1596 <expand macro="estimator_params_text"
1597 help="Default(=blank): sampling_strategy='auto', random_state=None, estimator=None, voting='auto'."/>
1598 </when>
1599 <when value="under_sampling.CondensedNearestNeighbour">
1600 <expand macro="estimator_params_text"
1601 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1."/>
1602 </when>
1603 <when value="under_sampling.EditedNearestNeighbours">
1604 <expand macro="estimator_params_text"
1605 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'."/>
1606 </when>
1607 <when value="under_sampling.RepeatedEditedNearestNeighbours">
1608 <expand macro="estimator_params_text"
1609 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'."/>
1610 </when>
1611 <when value="under_sampling.AllKNN">
1612 <expand macro="estimator_params_text"
1613 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', allow_minority=False."/>
1614 </when>
1615 <when value="under_sampling.InstanceHardnessThreshold">
1616 <expand macro="estimator_params_text"
1617 help="Default(=blank): estimator=None, sampling_strategy='auto', random_state=None, cv=5."/>
1618 </when>
1619 <when value="under_sampling.NearMiss">
1620 <expand macro="estimator_params_text"
1621 help="Default(=blank): sampling_strategy='auto', random_state=None, version=1, n_neighbors=3, n_neighbors_ver3=3."/>
1622 </when>
1623 <when value="under_sampling.NeighbourhoodCleaningRule">
1624 <expand macro="estimator_params_text"
1625 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', threshold_cleaning=0.5."/>
1626 </when>
1627 <when value="under_sampling.OneSidedSelection">
1628 <expand macro="estimator_params_text"
1629 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1."/>
1630 </when>
1631 <when value="under_sampling.RandomUnderSampler">
1632 <expand macro="estimator_params_text"
1633 help="Default(=blank): sampling_strategy='auto', random_state=None, replacement=False."/>
1634 </when>
1635 <when value="under_sampling.TomekLinks">
1636 <expand macro="estimator_params_text"
1637 help="Default(=blank): sampling_strategy='auto', random_state=None."/>
1638 </when>
1639 <when value="over_sampling.ADASYN">
1640 <expand macro="estimator_params_text"
1641 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=5."/>
1642 </when>
1643 <when value="over_sampling.RandomOverSampler">
1644 <expand macro="estimator_params_text"
1645 help="Default(=blank): sampling_strategy='auto', random_state=None."/>
1646 </when>
1647 <when value="over_sampling.SMOTE">
1648 <expand macro="estimator_params_text"
1649 help="Default(=blank): sampling_strategy='auto', random_state=None, k_neighbors=5."/>
1650 </when>
1651 <when value="over_sampling.SVMSMOTE">
1652 <expand macro="estimator_params_text"
1653 help="Default(=blank): sampling_strategy='auto', k_neighbors=5, m_neighbors=10, out_step=0.5, random_state=None, svm_estimator=None."/>
1654 </when>
1655 <when value="over_sampling.BorderlineSMOTE">
1656 <expand macro="estimator_params_text"
1657 help="Default(=blank): sampling_strategy='auto', k_neighbors=5, kind='borderline-1', m_neighbors=10, random_state=None."/>
1658 </when>
1659 <when value="over_sampling.SMOTENC">
1660 <expand macro="estimator_params_text"
1661 help="Default: categorical_features=[], sampling_strategy='auto', random_state=None, k_neighbors=5."/>
1662 </when>
1663 <when value="combine.SMOTEENN">
1664 <expand macro="estimator_params_text"
1665 help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, enn=None."/>
1666 </when>
1667 <when value="combine.SMOTETomek">
1668 <expand macro="estimator_params_text"
1669 help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None."/>
1670 </when>
1671 </conditional>
1672 </xml>
1673
1404 <!-- Outputs --> 1674 <!-- Outputs -->
1405 1675
1406 <xml name="output"> 1676 <xml name="output">
1407 <outputs> 1677 <outputs>
1408 <data format="tabular" name="outfile_predict"> 1678 <data format="tabular" name="outfile_predict">
1496 keywords = {large-scale machine learning}, 1766 keywords = {large-scale machine learning},
1497 } 1767 }
1498 </citation> 1768 </citation>
1499 </xml> 1769 </xml>
1500 1770
1771 <xml name="imblearn_citation">
1772 <citation type="bibtex">
1773 @article{JMLR:v18:16-365,
1774 author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
1775 title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
1776 journal = {Journal of Machine Learning Research},
1777 year = {2017},
1778 volume = {18},
1779 number = {17},
1780 pages = {1-5},
1781 url = {http://jmlr.org/papers/v18/16-365.html}
1782 }
1783 </citation>
1784 </xml>
1785
1501 </macros> 1786 </macros>