Mercurial > repos > bgruening > cleanlab
annotate cleanlab_issue_handler.xml @ 0:ecc18228c32e draft default tip
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
| author | bgruening |
|---|---|
| date | Wed, 28 May 2025 11:30:39 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
1 <tool id="cleanlab_issue_handler" name="Cleanlab Issue Handler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
2 <description>Detect and optionally clean data issues using Cleanlab</description> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
3 <macros> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
4 <token name="@TOOL_VERSION@">2.7.1</token> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
5 <token name="@VERSION_SUFFIX@">1.0</token> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
6 </macros> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
7 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
8 <requirements> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
9 <requirement type="package" version="2.7.1">cleanlab</requirement> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
10 <requirement type="package" version="3.6.0">datasets</requirement> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
11 <requirement type="package" version="3.0.0">xgboost</requirement> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
12 </requirements> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
13 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
14 <command detect_errors="exit_code"><![CDATA[ |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
15 python '${__tool_directory__}/cleanlab_issue_handler.py' |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
16 --input_file '$input_file' '$input_file.ext' |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
17 --target_column '$target_column' |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
18 --task "$task_block.task" |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
19 --method "$task_block.method" |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
20 #if $summary_only: |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
21 --summary |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
22 #end if |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
23 #if str($task_block.task) == "classification": |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
24 $task_block.label_issues |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
25 $task_block.outliers |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
26 $task_block.near_duplicates |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
27 $task_block.non_iid |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
28 #elif str($task_block.task) == "regression": |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
29 --quality-threshold "$task_block.quality_threshold" |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
30 #end if |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
31 ]]></command> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
32 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
33 <inputs> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
34 <param name="input_file" type="data" format="csv,tsv,tabular" label="Input data file"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
35 <param name="target_column" type="text" optional="false" label="Target column name" value="target" help="Name of the target column in the input data file. Default is 'target'."/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
36 <param name="summary_only" type="boolean" label="Only generate summary report?" checked="false"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
37 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
38 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
39 <param name="task" type="select" label="Task type"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
40 <option value="classification">Classification</option> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
41 <option value="regression">Regression</option> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
42 </param> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
43 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
44 <when value="classification"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
45 <param name="method" type="select" label="Cleaning method"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
46 <option value="remove">Remove problematic rows</option> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
47 <option value="replace">Replace problematic labels (classification only)</option> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
48 </param> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
49 <param name="label_issues" type="boolean" truevalue="" falsevalue="--no-label-issues" label="Remove/Replace label issues" checked="true"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
50 <param name="outliers" type="boolean" truevalue="" falsevalue="--no-outliers" label="Remove/Replace outlier issues" checked="true"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
51 <param name="near_duplicates" type="boolean" truevalue="" falsevalue="--no-near-duplicates" label="Remove/Replace near-duplicate issues" checked="true"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
52 <param name="non_iid" type="boolean" truevalue="" falsevalue="--no-non-iid" label="Remove/Replace non-IID issues" checked="true"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
53 </when> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
54 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
55 <when value="regression"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
56 <param name="method" type="select" label="Cleaning method"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
57 <option value="remove">Remove problematic rows</option> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
58 <!-- No "replace" option for regression --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
59 </param> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
60 <param name="quality_threshold" type="float" label="Quality threshold" value="0.2" min="0.0" max="1.0" help="Threshold for low-quality labels. Default is 0.2."/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
61 <!-- No issue type parameters shown --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
62 </when> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
63 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
64 </inputs> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
65 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
66 <outputs> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
67 <data name="report_file" from_work_dir="summary.txt" format="txt" label="Issue Report"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
68 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
69 <data name="output_file" from_work_dir="cleaned_data" format_source="input_file" label="cleaned_${input_file.name}"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
70 <filter>not summary_only</filter> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
71 </data> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
72 </outputs> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
73 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
74 <tests> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
75 <!-- Test1: only summary --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
76 <test expect_num_outputs="1"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
77 <param name="input_file" value="breast_cancer.csv" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
78 <param name="target_column" value="target" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
79 <param name="summary_only" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
80 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
81 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
82 <param name="task" value="classification" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
83 <param name="method" value="remove" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
84 <param name="label_issues" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
85 <param name="outliers" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
86 <param name="near_duplicates" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
87 <param name="non_iid" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
88 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
89 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
90 <output name="report_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
91 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
92 <has_text_matching expression="issue_type\s+score\s+num_issues"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
93 <has_text_matching expression="label\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
94 <has_text_matching expression="outlier\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
95 <has_text_matching expression="non_iid\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
96 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
97 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
98 </test> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
99 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
100 <!-- Test2: summary and cleaned all --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
101 <test expect_num_outputs="2"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
102 <param name="input_file" value="breast_cancer.csv" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
103 <param name="target_column" value="target" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
104 <param name="summary_only" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
105 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
106 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
107 <param name="task" value="classification" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
108 <param name="method" value="remove" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
109 <param name="label_issues" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
110 <param name="outliers" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
111 <param name="near_duplicates" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
112 <param name="non_iid" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
113 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
114 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
115 <output name="report_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
116 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
117 <has_text_matching expression="issue_type\s+score\s+num_issues"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
118 <has_text_matching expression="label\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
119 <has_text_matching expression="outlier\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
120 <has_text_matching expression="non_iid\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
121 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
122 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
123 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
124 <output name="output_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
125 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
126 <has_text_matching expression=".*target.*"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
127 <has_text_matching expression="^.*,.+,.+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
128 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
129 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
130 </test> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
131 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
132 <!-- Test3: summary and cleaned label issues only --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
133 <test expect_num_outputs="2"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
134 <param name="input_file" value="breast_cancer.csv" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
135 <param name="target_column" value="target" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
136 <param name="summary_only" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
137 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
138 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
139 <param name="task" value="classification" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
140 <param name="method" value="remove" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
141 <param name="label_issues" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
142 <param name="outliers" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
143 <param name="near_duplicates" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
144 <param name="non_iid" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
145 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
146 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
147 <output name="report_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
148 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
149 <has_text_matching expression="label\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
150 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
151 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
152 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
153 <output name="output_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
154 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
155 <has_text_matching expression=".*target.*"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
156 <has_text_matching expression="^.*,.+,.+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
157 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
158 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
159 </test> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
160 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
161 <!-- Test4: summary and cleaned outliers only --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
162 <test expect_num_outputs="2"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
163 <param name="input_file" value="breast_cancer.csv" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
164 <param name="target_column" value="target" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
165 <param name="summary_only" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
166 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
167 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
168 <param name="task" value="classification" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
169 <param name="method" value="remove" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
170 <param name="label_issues" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
171 <param name="outliers" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
172 <param name="near_duplicates" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
173 <param name="non_iid" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
174 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
175 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
176 <output name="report_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
177 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
178 <has_text_matching expression="outlier\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
179 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
180 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
181 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
182 <output name="output_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
183 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
184 <has_text_matching expression=".*target.*"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
185 <has_text_matching expression="^.*,.+,.+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
186 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
187 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
188 </test> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
189 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
190 <!-- Test5: summary and clean all for tsv --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
191 <test expect_num_outputs="2"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
192 <param name="input_file" value="breast_cancer.tsv" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
193 <param name="target_column" value="target" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
194 <param name="summary_only" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
195 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
196 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
197 <param name="task" value="classification" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
198 <param name="method" value="remove" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
199 <param name="label_issues" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
200 <param name="outliers" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
201 <param name="near_duplicates" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
202 <param name="non_iid" value="true" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
203 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
204 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
205 <output name="report_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
206 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
207 <has_text_matching expression="issue_type\s+score\s+num_issues"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
208 <has_text_matching expression="label\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
209 <has_text_matching expression="outlier\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
210 <has_text_matching expression="non_iid\s+(0(\.\d+)?|1(\.0+)?)\s+\d+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
211 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
212 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
213 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
214 <output name="output_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
215 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
216 <has_text_matching expression=".*target.*"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
217 <has_text_matching expression="^.*\t.+\t.+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
218 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
219 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
220 </test> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
221 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
222 <!-- Test6: regression with summary and cleaned output --> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
223 <test expect_num_outputs="2"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
224 <param name="input_file" value="reg_1027_ESL.csv" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
225 <param name="target_column" value="target" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
226 <param name="summary_only" value="false" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
227 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
228 <conditional name="task_block"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
229 <param name="task" value="regression" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
230 <param name="method" value="remove" /> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
231 </conditional> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
232 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
233 <output name="report_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
234 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
235 <has_text text="Regression Issue Summary:"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
236 <has_text_matching expression="Num low quality:"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
237 <has_text_matching expression="Mean label quality:"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
238 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
239 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
240 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
241 <output name="output_file"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
242 <assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
243 <has_text_matching expression=".*target.*"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
244 <has_text_matching expression="^.*,.+,.+"/> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
245 </assert_contents> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
246 </output> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
247 </test> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
248 </tests> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
249 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
250 <help>< Python library. It supports **classification** and **regression** tasks and helps improve dataset quality by detecting label errors, outliers, near-duplicate entries, and non-IID samples. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
254 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
255 The tool internally fits a cross-validated model (e.g., via XGBoost) to estimate label quality and identify problematic samples. These issues can be summarized in a report, and optionally addressed via removal or correction (depending on task and selected method). |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
256 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
257 -------------------- |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
258 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
259 **Detected Issue Types (with technical examples)** |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
260 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
261 - **Label Issues** |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
262 These are samples whose label in the dataset is likely incorrect. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
263 **Example:** In a medical classification dataset, a patient's record is labeled as "benign," but its feature pattern is highly similar to correctly labeled "malignant" cases. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
264 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
265 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
266 - **Outliers** |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
267 Points that are statistically distant from the rest of the dataset. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
268 **Example**: An entry with unusually high or low feature values (e.g., several standard deviations away from the mean). |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
269 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
270 - **Near-Duplicates** |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
271 Highly similar or repeated samples. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
272 **Example**: Two rows with nearly identical features and labels — possibly a duplication or copy artifact. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
273 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
274 - **Non-IID Samples** *(classification only)* |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
275 Samples that violate the assumption of independent and identically distributed data. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
276 **Example**: A subset from a different population source (e.g., a different hospital or device) introducing distributional shift. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
277 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
278 -------------------- |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
279 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
280 **Parameters** |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
281 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
282 - **Input file**: Tabular file (CSV/TSV) with a `target` column. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
283 - **Task type**: `classification` or `regression`. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
284 - **Method**: `remove` (delete problematic rows) or `replace` (correct labels — classification only). |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
285 - **Only report issues**: If checked, input data is unchanged; only a summary report is produced. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
286 - **Issue types**: Choose which issues to detect and handle. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
287 - **Quality threshold** *(regression only)*: A float between 0.0 and 1.0 that determines how aggressively the tool flags low-quality labels in regression. Labels with quality scores below this threshold will be removed. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
288 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
289 -------------------- |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
290 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
291 **Outputs** |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
292 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
293 - **summary.txt**: Report listing each issue type, confidence score, and number of affected rows. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
294 - **cleaned_data**: Cleaned dataset (CSV/TSV), only produced if "Only report issues" is unchecked. |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
295 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
296 ]]></help> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
297 |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
298 <citations> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
299 <citation type="bibtex"> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
300 @inproceedings{northcutt2021confident, |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
301 title={Confident learning: Estimating uncertainty in dataset labels}, |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
302 author={Northcutt, Curtis G and Jiang, Lu and Chuang, Alex}, |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
303 booktitle={Journal of Artificial Intelligence Research}, |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
304 year={2021}, |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
305 volume={70}, |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
306 pages={1373--1411} |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
307 } |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
308 </citation> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
309 </citations> |
|
ecc18228c32e
planemo upload for repository https://github.com/cleanlab/cleanlab commit ac4753a61ee908bc2a5953b6c6d38d2bbbacc6c0
bgruening
parents:
diff
changeset
|
310 </tool> |
