changeset 14:5d954f848f50 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 2adf8edc3de4e1cd3b299b26abb14544d17d0636"
author galaxyp
date Fri, 06 Nov 2020 18:20:41 +0000
parents 82c92f0efc58
children
files OMSSAAdapter.patch PepNovoAdapter.patch filetypes.txt fill_ctd.py fill_ctd_clargs.py generate-foo.sh generate.sh hardcoded_params.json macros.xml prepare_test_data_manual.sh readme.md test-data.sh
diffstat 12 files changed, 1480 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/OMSSAAdapter.patch	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,10 @@
+--- OMSSAAdapter.xml	2020-06-16 15:51:40.315400730 +0200
++++ /tmp/OMSSAAdapter.xml	2020-06-16 15:50:23.536086074 +0200
+@@ -22,6 +22,7 @@
+ mkdir database &&
+ ln -s '$database' 'database/${re.sub("[^\w\-_]", "_", $database.element_identifier)}.$gxy2omsext($database.ext)' &&
+ 
++makeblastdb -dbtype prot -in 'database/${re.sub("[^\w\-_]", "_", $database.element_identifier)}.$gxy2omsext($database.ext)' &&
+ ## Main program call
+ 
+ set -o pipefail &&
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PepNovoAdapter.patch	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,35 @@
+--- PepNovoAdapter.xml	2020-05-12 15:55:24.712831518 +0200
++++ /tmp/PepNovoAdapter.xml	2020-05-12 15:36:31.267276757 +0200
+@@ -42,8 +42,13 @@
+   </configfiles>
+   <inputs>
+     <param name="in" argument="-in" type="data" format="mzml" optional="false" label="input file" help=" select mzml data sets(s)"/>
+-    <param name="model_directory" argument="-model_directory" type="text" optional="false" value="" label="Name of the directory where the model files are kept" help="">
+-      <expand macro="list_string_san"/>
++    <param name="model_directory" argument="-model_directory" type="select" optional="false" label="Name of the directory where the model files are kept" help="">
++        <options from_data_table="pepnovo_models">
++            <column name="name" index="0"/>
++            <column name="value" index="2"/>
++            <filter type="unique_value" name="unique_set" column="0"/>
++            <validator type="no_options" message="No model directory available"/>
++        </options>
+     </param>
+     <param name="correct_pm" argument="-correct_pm" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Find optimal precursor mass and charge values" help=""/>
+     <param name="use_spectrum_charge" argument="-use_spectrum_charge" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Do not correct charge" help=""/>
+@@ -51,8 +56,14 @@
+     <param name="no_quality_filter" argument="-no_quality_filter" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Do not remove low quality spectra" help=""/>
+     <param name="fragment_tolerance" argument="-fragment_tolerance" type="float" optional="true" value="-1.0" label="The fragment tolerance (between 0 and 0.75 Da" help="Set to -1.0 to use model's default setting)"/>
+     <param name="pm_tolerance" argument="-pm_tolerance" type="float" optional="true" value="-1.0" label="The precursor mass tolerance (between 0 and 5.0 Da" help="Set to -1.0 to use model's default setting)"/>
+-    <param name="model" argument="-model" type="text" optional="true" value="CID_IT_TRYP" label="Name of the model that should be used" help="">
+-      <expand macro="list_string_san"/>
++    <param name="model" argument="-model" type="select" label="Name of the model that should be used" help="">
++        <options from_data_table="pepnovo_models">
++            <column name="name" index="1"/>
++            <column name="value" index="1"/>
++            <filter type="param_value" ref="model_directory" column="2"/>
++            <filter type="unique_value" column="1"/>
++            <validator type="no_options" message="No model available"/>
++        </options>
+     </param>
+     <param name="digest" argument="-digest" display="radio" type="select" optional="false" label="Enzyme used for digestion (default TRYPSIN)" help="">
+       <option value="TRYPSIN" selected="true">TRYPSIN</option>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filetypes.txt	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,85 @@
+# CTD type    # Galaxy type
+# the following lines need to be at the top in order to ensure 
+# correct translation Galaxy->CTD type for the ambiguous cases
+# (should only be relevant for the autogenerated tests [which 
+# do not set the ftype of the inputs])
+txt           txt
+tsv           tabular
+
+##analysisXML
+# XTandemAdapter output is named xml in OMS (which is to unspecific) and bioml in Galaxy .. so this is renamed via hardcoded parameters 
+bioml         xml
+consensusXML  consensusxml
+# TODO csv is problematic, since csv often actually means tsv .. but not always
+csv           csv
+##dat
+dta           dta
+dta2d         dta2d
+edta          edta
+fa            fasta
+fas           fasta
+fasta         fasta
+FASTA         fasta
+featureXML    featurexml
+featurexml    featurexml
+# fid
+html          html
+HTML          html
+idXML         idxml
+##ini         txt
+json          json
+kroenik	      kroenik
+mascotXML     mascotxml
+mgf           mgf
+mrm           mrm
+ms            sirius.ms
+ms2           ms2
+msp           msp
+mzData        mzdata
+mzid          mzid
+# important to have mzML first, since LuciphorAdapter is case sensitive https://github.com/OpenMS/OpenMS/issues/4444
+mzML          mzml
+mzml          mzml
+mzq           mzq
+mzTab         mztab
+mzXML         mzxml
+novor         txt
+obo           obo
+# I guess this is the idXML output of omssa
+omssaXML      idxml
+osw           osw
+OSW           osw
+params        txt
+paramXML      paramxml
+fasta         peff
+peplist       peplist
+# TODO pep.xml should be removed with OMS 2.6 https://github.com/OpenMS/OpenMS/pull/4541 .. but still in the tests
+pep.xml       pepxml
+pepXML        pepxml
+png           png
+PNG           png
+protXML       protxml
+psms          psms
+# TODO implement or use
+# psq
+pqp           pqp
+qcML          qcml
+spec.xml      spec.xml
+splib         splib
+sqMass        sqmass
+tandem.xml    tandem
+trafoXML      trafoxml
+traML         traml
+TraML         traml
+tab           tabular
+## MOVED TO TOP txt           txt
+raw           thermo.raw
+## xls: SpectraSTSearchAdapter https://github.com/OpenMS/OpenMS/pull/4419
+xls           tsv
+XML           xml
+xml           xml
+xquest.xml    xquest.xml
+xsd           xml
+
+# TODO needs to be implemented, needs to be below xml in order that Galaxy->OMS mapping gives xml
+# cachedMzML    xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fill_ctd.py	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,196 @@
+import collections
+import json
+import operator
+import os
+import re
+import subprocess
+import sys
+from functools import reduce  # forward compatibility for Python 3
+
+from CTDopts.CTDopts import (
+    _Choices,
+    _InFile,
+    _Null,
+    _NumericRange,
+    CTDModel
+)
+
+
+def getFromDict(dataDict, mapList):
+    return reduce(operator.getitem, mapList, dataDict)
+
+
+def setInDict(dataDict, mapList, value):
+    getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
+
+
+def mergeDicts(d, e):
+    """
+    insert values from the dict e into dict d
+    no values of d are overwritten
+    """
+    for k, v in e.items():
+        if (k in d and isinstance(d[k], dict) and isinstance(e[k], collections.abc.Mapping)):
+            mergeDicts(d[k], e[k])
+        elif k not in d and not isinstance(e[k], collections.abc.Mapping):
+            d[k] = e[k]
+        else:
+            sys.stderr.write("fill_ctd.py: could not merge key %s for %s in %s" % (k, d, e))
+            sys.exit(1)
+
+
+def _json_object_hook_noenvlookup(d):
+    return _json_object_hook(d, envlookup=False)
+
+
+def _json_object_hook(d, envlookup=True):
+    """
+    wee helper to transform the json written by galaxy
+    while loading
+    - True/False (bool objects) -> "true"/"false" (lowercase string)
+    - data inputs with multiple and optional true give [None] if no file is given -> []
+    - None -> "" (empty string)
+    - replace bash expressions (if envlookup is True):
+      - environment variables (need to consist capital letters and _) by their value
+      - expressions
+    """
+    for k in d.keys():
+        # if type(d[k]) is bool:
+        #     d[k] = str(d[k]).lower()
+        # else
+        if type(d[k]) is list and len(d[k]) == 1 and d[k][0] is None:
+            d[k] = []
+        elif d[k] is None:
+            d[k] = ""
+        elif envlookup and type(d[k]) is str and d[k].startswith("$"):
+            m = re.fullmatch(r"\$([A-Z_]+)", d[k])
+            if m:
+                d[k] = os.environ.get(m.group(1), "")
+                continue
+            m = re.fullmatch(r"\$(\{[A-Z_]+):-(.*)\}", d[k])
+            if m:
+                d[k] = os.environ.get(m.group(1), m.group(2))
+                continue
+
+            try:
+                p = subprocess.run("echo %s" % d[k], shell=True, check=True, stdout=subprocess.PIPE, encoding="utf8")
+                d[k] = p.stdout.strip()
+            except subprocess.CalledProcessError:
+                sys.stderr.write("fill_ctd error: Could not evaluate %s" % d[k])
+                continue
+    return d
+
+
+def qstring2list(qs):
+    """
+    transform a space separated string that is quoted by " into a list
+    """
+    lst = list()
+    qs = qs.split(" ")
+    quoted = False
+    for p in qs:
+        if p == "":
+            continue
+        if p.startswith('"') and p.endswith('"'):
+            lst.append(p[1:-1])
+        elif p.startswith('"'):
+            quoted = True
+            lst.append(p[1:] + " ")
+        elif p.endswith('"'):
+            quoted = False
+            lst[-1] += p[:-1]
+        else:
+            if quoted:
+                lst[-1] += p + " "
+            else:
+                lst.append(p)
+    return lst
+
+
+def fix_underscores(args):
+    if type(args) is dict:
+        for k in list(args.keys()):
+            v = args[k]
+            if type(v) is dict:
+                fix_underscores(args[k])
+            if k.startswith("_"):
+                args[k[1:]] = v
+                del args[k]
+    elif type(args) is list:
+        for i, v in enumerate(args):
+            if type(v) is dict:
+                fix_underscores(args[i])
+
+
+input_ctd = sys.argv[1]
+
+# load user specified parameters from json
+with open(sys.argv[2]) as fh:
+    args = json.load(fh, object_hook=_json_object_hook_noenvlookup)
+
+# load hardcoded parameters from json
+with open(sys.argv[3]) as fh:
+    hc_args = json.load(fh, object_hook=_json_object_hook)
+
+# insert the hc_args into the args
+mergeDicts(args, hc_args)
+
+if "adv_opts_cond" in args:
+    args.update(args["adv_opts_cond"])
+    del args["adv_opts_cond"]
+
+# IDMapper has in and spectra:in params, in is used in out as format_source",
+# which does not work in Galaxy: https://github.com/galaxyproject/galaxy/pull/9493"
+# therefore hardcoded params change the name of spectra:in to spectra:_in
+# which is corrected here again
+# TODO remove once PR is in and adapt profile accordingly
+fix_underscores(args)
+
+model = CTDModel(from_file=input_ctd)
+
+# transform values from json that correspond to
+# - old style booleans (string + restrictions) -> transformed to a str
+# - new style booleans that get a string (happens for hidden parameters [-test])
+#   are transformed to a bool
+# - unrestricted ITEMLIST which are represented as strings
+#   ("=quoted and space separated) in Galaxy -> transform to lists
+# - optional data input parameters that have defaults and for which no
+#   value is given -> overwritte with the default
+for p in model.get_parameters():
+
+    # check if the parameter is in the arguments from the galaxy tool
+    # (from the json file(s)), since advanced parameters are absent
+    # if the conditional is set to basic parameters
+    try:
+        getFromDict(args, p.get_lineage(name_only=True))
+    except KeyError:
+        # few tools use dashes in parameters which are automatically replaced
+        # by underscores by Galaxy. in these cases the dictionary needs to be
+        # updated (better: then dash and the underscore variant are in the dict)
+        # TODO might be removed later https://github.com/OpenMS/OpenMS/pull/4529
+        try:
+            lineage = [_.replace("-", "_") for _ in p.get_lineage(name_only=True)]
+            val = getFromDict(args, lineage)
+        except KeyError:
+            continue
+        else:
+            setInDict(args, p.get_lineage(name_only=True), val)
+
+    if p.type is str and type(p.restrictions) is _Choices and set(p.restrictions.choices) == set(["true", "false"]):
+        v = getFromDict(args, p.get_lineage(name_only=True))
+        setInDict(args, p.get_lineage(name_only=True), str(v).lower())
+    elif p.type is bool:
+        v = getFromDict(args, p.get_lineage(name_only=True))
+        if isinstance(v, str):
+            v = (v.lower() == "true")
+            setInDict(args, p.get_lineage(name_only=True), v)
+    elif p.is_list and (p.restrictions is None or type(p.restrictions) is _NumericRange):
+        v = getFromDict(args, p.get_lineage(name_only=True))
+        if type(v) is str:
+            setInDict(args, p.get_lineage(name_only=True), qstring2list(v))
+    elif p.type is _InFile and not (p.default is None or type(p.default) is _Null):
+        v = getFromDict(args, p.get_lineage(name_only=True))
+        if v in [[], ""]:
+            setInDict(args, p.get_lineage(name_only=True), p.default)
+
+model.write_ctd(input_ctd, arg_dict=args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fill_ctd_clargs.py	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+from argparse import ArgumentParser
+from io import StringIO
+
+from CTDopts.CTDopts import (
+    CTDModel,
+    ModelTypeError,
+    Parameters
+)
+
+if __name__ == "__main__":
+    # note add_help=False since otherwise arguments starting with -h will
+    # trigger an error (despite allow_abbreviate)
+    parser = ArgumentParser(prog="fill_ctd_clargs",
+                            description="fill command line arguments"
+                            "into a CTD file and write the CTD file to",
+                            add_help=False, allow_abbrev=False)
+    parser.add_argument("--ctd", dest="ctd", help="input ctd file",
+                        metavar='CTD', default=None, required=True)
+    args, cliargs = parser.parse_known_args()
+    # load CTDModel
+    model = None
+    try:
+        model = CTDModel(from_file=args.ctd)
+    except ModelTypeError:
+        pass
+    try:
+        model = Parameters(from_file=args.ctd)
+    except ModelTypeError:
+        pass
+    assert model is not None, "Could not parse %s, seems to be no CTD/PARAMS" % (args.ctd)
+
+    # get a dictionary of the ctd arguments where the values of the parameters
+    # given on the command line are overwritten
+    margs = model.parse_cl_args(cl_args=cliargs, ignore_required=True)
+
+    # write the ctd with the values taken from the dictionary
+    out = StringIO()
+    ctd_tree = model.write_ctd(out, margs)
+    print(out.getvalue())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generate-foo.sh	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,222 @@
+#!/usr/bin/env bash
+
+# parse test definitions from OpenMS sources for a tool with a given id
+function get_tests2 {
+    id=$1
+    >&2 echo "generate tests for $id"
+    echo '<xml name="autotest_'"$id"'">'
+
+    # get the tests from the CMakeLists.txt
+    # 1st remove some tests
+    # - OpenSwathMzMLFileCacher with -convert_back argumen https://github.com/OpenMS/OpenMS/issues/4399
+    # - IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differentlt
+    # - several tools with duplicated input (leads to conflict when linking)
+    # - TOFCalibration inputs we extension (also in prepare_test_data) https://github.com/OpenMS/OpenMS/pull/4525
+    # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456)
+    # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed)
+    # - some input files are originally in a subdir (degenerated cases/), but not in test-data
+    # - SeedListGenerator: https://github.com/OpenMS/OpenMS/issues/4404
+    # - OpenSwathAnalyzer 9/10: cachedMzML (not supported yet)
+    # - FeatureFinderIdentification name clash of two tests https://github.com/OpenMS/OpenMS/pull/5002
+    # - TODO SiriusAdapter https://github.com/OpenMS/OpenMS/pull/5010
+    CMAKE=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake  |
+        sed 's@${DATA_DIR_SHARE}/@@g' |
+        grep -v 'OpenSwathMzMLFileCacher .*-convert_back' |
+        sed 's/${TMP_RIP_PATH}/""/' |
+        grep -v "MaRaClusterAdapter.*-consensus_out"|
+        grep -v "FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta " |
+        sed 's@degenerate_cases/@@g' |
+        grep -v 'TOPP_SeedListGenerator_3"' | 
+        egrep -v 'TOPP_OpenSwathAnalyzer_test_3"|TOPP_OpenSwathAnalyzer_test_4"' |
+	egrep -v '"TOPP_FeatureFinderIdentification_4"' | 
+	sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/')
+
+
+#         grep -v 'FileFilter.*-spectra:select_polarity ""' |
+#         grep -v 'MassTraceExtractor_2.ini ' |
+#         grep -v "FileMerger_6_input2.mzML.*FileMerger_6_input2.mzML" |
+#         grep -v "IDMerger_1_input1.idXML.*IDMerger_1_input1.idXML" |
+#         grep -v "degenerated_empty.idXML.*degenerated_empty.idXML" |
+#         grep -v "FeatureLinkerUnlabeledKD_1_output.consensusXML.*FeatureLinkerUnlabeledKD_1_output.consensusXML" |
+#         grep -v "FeatureLinkerUnlabeledQT_1_output.consensusXML.*FeatureLinkerUnlabeledQT_1_output.consensusXML" |
+
+    # 1st part is a dirty hack to join lines containing a single function call, e.g.
+    # addtest(....
+    #         ....)
+    echo "$CMAKE" | sed 's/#.*//; s/^\s*//; s/\s*$//' | grep -v "^#" | grep -v "^$"  | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | 
+        grep -iE "add_test\(\"(TOPP|UTILS)_.*/$id " | egrep -v "_prepare\"|_convert|WRITEINI|WRITECTD|INVALIDVALUE"  | while read -r line
+    do
+        line=$(echo "$line" | sed 's/add_test("\([^"]\+\)"/\1/; s/)$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g')
+        # >&2 echo $line
+        test_id=$(echo "$line" | cut -d" " -f 1)
+        tool_id=$(echo "$line" | cut -d" " -f 2)
+        if [[ $test_id =~ _out_?[0-9]? ]]; then
+            >&2 echo "    skip $test_id $line"
+            continue
+        fi
+        if [[ ${id,,} != ${tool_id,,} ]]; then
+            >&2 echo "    skip $test_id ($id != $tool_id) $line"
+            continue
+        fi
+
+        #remove tests with set_tests_properties(....PROPERTIES WILL_FAIL 1)
+        if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
+            >&2 echo "    skip failing "$test_id
+            continue
+        fi
+        tes="  <test>\n"
+        line=$(fix_tmp_files "$line")
+        line=$(unique_files "$line")
+        # >&2 echo $line
+        #if there is an ini file then we use this to generate the test
+        #otherwise the ctd file is used
+        #other command line parameters are inserted later into this xml
+        if grep -lq "\-ini" <<<"$line"; then
+            ini=$(echo $line | sed 's/.*-ini \([^ ]\+\).*/\1/')
+            ini="test-data/$ini"
+        else
+            ini="ctd/$tool_id.ctd"
+        fi
+        cli=$(echo $line |cut -d" " -f3- | sed 's/-ini [^ ]\+//')
+
+        ctdtmp=$(mktemp)
+        #echo python3 fill_ctd_clargs.py --ctd $ini $cli
+        # using eval: otherwise for some reason quoted values are not used properly ('A B' -> ["'A", "B'"])
+        # >&2 echo "python3 fill_ctd_clargs.py --ctd $ini $cli"
+        eval "python3 fill_ctd_clargs.py --ctd $ini $cli" > "$ctdtmp"
+        # echo $ctdtmp
+        # >&2 cat $ctdtmp
+        testtmp=$(mktemp)
+        python3 $CTDCONVERTER/convert.py galaxy -i $ctdtmp -o $testtmp -s tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf  -p hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib > /dev/null
+        cat $testtmp | grep -v '<output.*file=""' # | grep -v 'CHEMISTRY/'
+        rm $ctdtmp $testtmp
+
+        #> /dev/null
+
+        #rm $testtmp
+    done 
+    echo '</xml>'
+}
+
+#some tests use the same file twice which does not work in planemo tests
+#hence we create symlinks for each file used twice
+function unique_files {
+    line=$@
+    for arg in $@
+    do
+        if [[ ! -f "test-data/$arg" ]]; then
+            continue
+        fi
+        cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n'))
+        while [[ $cnt -gt 1 ]]; do
+            new_arg=$(echo $arg | sed "s/\(.*\)\./\1_$cnt./")
+            ln -fs $arg test-data/$new_arg
+            line=$(echo $line | sed "s/\($arg.*\)$arg/\1$new_arg/")
+            cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n'))
+        done
+    done
+
+    echo $line
+}
+
+# options of out_type selects need to be fixed to Galaxy data types
+function fix_out_type {
+    grep "^$1" "$2" | awk '{print $2}'
+}
+
+#OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file.
+#problem: the extension of the tmp files is unusable for test generation.
+#unfortunately the extensions used in the DIFF lines are not always usable for the CLI
+#(e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt)
+#this function replaces the tmp file by the expected file. 
+function fix_tmp_files {
+#    >&2 echo "FIX $line"
+    ret=""
+    for a in $@; do
+        if [[ ! $a =~ .tmp$ ]]; then
+            ret="$ret $a"
+            continue
+        fi
+#        >&2 echo "    a "$a
+        g=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep '\${DIFF}.*'"$a")
+#        >&2 echo "    g "$g
+        in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$g)
+        # >&2 echo "    in1 "$in1
+        if [[  "$a" != "$in1" ]]; then
+            ret="$ret $a"
+            continue
+        fi
+        in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$g)
+        in2=$(basename $in2 | sed 's/)$//')
+        # >&2 echo "    in2 "$in2
+        if [[ -f "test-data/$in2" ]]; then
+            ln -fs "$in1" "test-data/$in2"
+            ret="$ret $in2"
+        else
+            ret="$ret $a"
+        fi
+    done
+#    >&2 echo "--> $ret"
+    echo "$ret"
+}
+
+function link_tmp_files {
+    # note this also considers commented lines (starting with a #)
+    # because of tests where the diff command is commented and we
+    # still want to use the extension of these files
+    cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/^\s*//; s/\s*$//' | grep -v "^$"  | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep "\${DIFF}" | while read -r line
+    do
+        in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$line)
+        in1=$(basename $in1 | sed 's/)$//')
+        in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$line)
+        in2=$(basename $in2 | sed 's/)$//')
+        if [[ "$in1" == "$in2" ]]; then
+            >&2 echo "not linking equal $in1 $in2"
+            continue
+        fi
+        ln -f -s $in1 test-data/$in2
+    done
+    for i in test-data/*.tmp
+    do
+        if [ ! -e test-data/$(basename $i .tmp) ]; then
+            ln -s $(basename $i) test-data/$(basename $i .tmp)
+            #ln -s $(basename $i) test-data/$(basename $i .tmp)
+        else
+            ln -fs $(basename $i) test-data/$(basename $i .tmp)
+        fi
+    done
+}
+
+
+
+# parse data preparation calls from OpenMS sources for a tool with a given id
+function prepare_test_data {
+#     id=$1
+# | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?"
+
+# TODO SiriusAdapter https://github.com/OpenMS/OpenMS/pull/5010
+    cat $OPENMSGIT/src/tests/topp/CMakeLists.txt  $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' | grep -v "^$"  | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | 
+        sed 's/degenerate_cases\///' | 
+        egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" | 
+        grep add_test | 
+        egrep "TOPP|UTILS" |
+        sed 's@${DATA_DIR_SHARE}/@@g;'|
+        sed 's@${TMP_RIP_PATH}@dummy2.tmp@g'|
+        sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'| 
+	sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' |
+    while read line
+    do
+        test_id=$(echo "$line" | sed 's/add_test(//; s/"//g;  s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1)
+
+        if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
+            >&2 echo "    skip failing "$test_id
+            continue
+        fi
+
+        line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-)
+        # line="$(fix_tmp_files $line)"
+        echo 'echo executing "'$test_id'"'
+	echo "$line > $test_id.stdout 2> $test_id.stderr"
+        echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/    /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi"    
+    done
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generate.sh	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+VERSION=2.6
+FILETYPES="filetypes.txt"
+PROFILE="20.05"
+## FILETYPES_RE=$(grep -v "^#" $FILETYPES | grep -v "^$" | cut -f 1 -d" " | tr '\n' '|' | sed 's/|$//'| sed 's/|/\\|/g')
+
+export tmp=$(mktemp -d)
+export CTDCONVERTER="$tmp/CTDConverter"
+
+###############################################################################
+## reset old data
+###############################################################################
+# rm $(ls *xml |grep -v macros)
+# rm -rf ctd
+# mkdir -p ctd
+# echo "" > prepare_test_data.sh
+
+###############################################################################
+## generate tests
+## also creates 
+## - conda environment (for executing the binaries) and 
+## - the git clone of OpenMS (for generating the tests)
+## - ctd files
+###############################################################################
+bash ./test-data.sh ./macros_autotest.xml
+
+###############################################################################
+## get the 
+## - conda package (for easy access and listing of the OpenMS binaries), 
+###############################################################################
+# if [ ! -d $OPENMSPKG ]; then
+# 	mkdir $OPENMSPKG/
+# 	wget -P $OPENMSPKG/ "$CONDAPKG"
+# 	tar -xf $OPENMSPKG/"$(basename $CONDAPKG)" -C OpenMS$VERSION-pkg/
+#   rm $OPENMSPKG/"$(basename $CONDAPKG)"
+# fi
+
+###############################################################################
+## Get python libaries for CTD -> Galaxy conversion
+## TODO fix to main repo OR conda packkage if PRs are merged 
+###############################################################################
+# if [ ! -d CTDopts ]; then
+# 	# git clone https://github.com/genericworkflownodes/CTDopts CTDopts
+# 	git clone -b topic/no-1-2x https://github.com/bernt-matthias/CTDopts CTDopts
+# fi
+if [ ! -d $CTDCONVERTER ]; then
+	#git clone https://github.com/WorkflowConversion/CTDConverter.git CTDConverter
+	git clone -b topic/cdata https://github.com/bernt-matthias/CTDConverter.git $CTDCONVERTER
+fi
+# export PYTHONPATH=$(pwd)/CTDopts
+###############################################################################
+## conversion ctd->xml 
+###############################################################################
+
+find . -maxdepth 0 -name "[A-Z]*xml" -delete
+source $(dirname $(which conda))/../etc/profile.d/conda.sh
+conda activate $tmp/OpenMS$VERSION-env
+python $CTDCONVERTER/convert.py galaxy -i ctd/*ctd -o ./ -s tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf  -p hardcoded_params.json --test-macros macros_autotest.xml --test-macros-prefix autotest_  --test-macros macros_test.xml --test-macros-prefix manutest_ --tool-version $VERSION --tool-profile $PROFILE > convert.out 2> convert.err
+if [[ "$?" -ne "0" ]]; then >&2 echo 'CTD -> XML conversion failed'; >&2 echo -e "stderr:\n$(cat convert.err)"; fi
+conda deactivate
+
+patch PepNovoAdapter.xml < PepNovoAdapter.patch
+patch OMSSAAdapter.xml < OMSSAAdapter.patch
+
+# https://github.com/OpenMS/OpenMS/pull/4984
+sed -i -e 's@http://www.openms.de/documentation/@http://www.openms.de/doxygen/release/2.6.0/html/@' ./*xml
+# https://github.com/OpenMS/OpenMS/pull/4984#issuecomment-702641976
+patch -p0 <404-urls.patch
+
+# #-b version log debug test in_type executable pepnovo_executable param_model_directory rt_concat_trafo_out param_id_pool
+
+# for i in A-E F-H I-L M-N O-P Q-Z
+# do
+# 	planemo t [$i]*xml --galaxy_branch release_20.05 --galaxy_python_version 3.7 --test_output $i.html --test_output_json $i.json &
+# done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hardcoded_params.json	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,236 @@
+{
+	"#": "blacklist parameters",
+
+	"version": [{"value": "@"}],
+	"debug": [{"value": "@"}],
+	"algorithm:debug": [{"value": "@"}],
+	"java_memory": [{"value": "@"}],
+	"java_permgen": [{"value": "@"}],
+	"#": "type of input is always determined from the file extension ",
+	"in_type": [{"value": "@"}],
+
+	"#": "tool specific blacklist parameters",
+
+	"convert_back": [{"value": "@", "tools": ["OpenSwathMzMLFileCacher"]}],
+	"NET_executable": [{
+			"value": "@", 
+			"tools": ["FileConverter"]
+	}],
+
+
+	"params_file": [{"value": "@", "tools": ["SpectraSTSearchAdapter"]}],
+
+	"#": "TODO not usable in 2.5 https://github.com/OpenMS/OpenMS/issues/4456, corresponding test currently disabled",
+    	"consensus_out": [{"value": "@", "tools": ["MaRaClusterAdapter"]}],
+	"#": "TODO would need treatment as prefix-output",
+	"output_directory": [{"value": "@", "tools": ["MaRaClusterAdapter"]}],
+
+	"#": "hardcode parameter values",
+
+	"comet_version": [{
+		"value":"2016.01 rev. 3"
+	}],
+	"comet_executable": [{
+		"value":"comet"
+	}],
+	"crux_executable": [{
+		"value": "crux"
+	}],
+	"fido_executable": [{
+		"value":"Fido"
+	}],
+	"fidocp_executable": [{
+		"value":"FidoChooseParameters"
+	}],
+	"maracluster_executable": [{
+		"value":"/home/berntm/projects/tools-galaxyp/tools/openms/OpenMS2.5.0-git/THIRDPARTY/Linux/64bit/MaRaCluster/maracluster"
+	}],
+	"mascot_directory": [{
+		"value":"TODO"
+	}],
+	"myrimatch_executable": [{
+		"value":"myrimatch"
+	}],
+	"omssa_executable": [{
+		"value":"$(dirname $(realpath $(which omssacl)))/omssacl"
+	}],
+	"ThermoRaw_executable": [{
+		"value": "ThermoRawFileParser.exe", 
+		"tools": ["FileConverter"]
+	}],
+	"pepnovo_executable": [{
+		"value":"pepnovo"
+	}],
+	"percolator_executable": [{
+		"value":"percolator"
+	}],
+	"xtandem_executable": [{
+		"value":"xtandem"
+	}],
+	"executable": [
+		{
+			"value":"$(dirname $(realpath $(which luciphor2)))/luciphor2.jar",
+			"tools": ["LuciphorAdapter"]
+		}, {
+			"value":"/home/berntm/Downloads/MSFragger-20171106/MSFragger-20171106.jar",
+			"tools": ["MSFraggerAdapter"]
+		}, {
+			"value":"$(msgf_plus -get_jar_path)",
+			"tools": ["MSGFPlusAdapter"]
+		}, {
+			"value": "/home/berntm/Downloads/novor/lib/novor.jar",
+			"tools": ["NovorAdapter"]
+		}, {
+			"value":"$(which sirius)",
+			"tools": ["SiriusAdapter", "AssayGeneratorMetabo"]
+		}, {
+			"value":"spectrast",
+			"tools": ["SpectraSTSearchAdapter"]
+		}
+	],
+	"r_executable": [{
+		"value":"R"
+	}],
+	"rscript_executable": [{
+		"value":"Rscript"
+	}],
+	"java_executable": [{
+		"value":"java"
+	}],
+	"log": [{
+		"value":"log.txt"
+	}],
+	"tempDirectory": [{
+		"value":"$TMP_DIR"
+	}],
+	"temp_data_directory": [{
+		"value":"$TMP_DIR"
+	}],
+	"algorithm:Preprocessing:tmp_dir": [{
+		"value":"$TMP_DIR"
+	}],
+	"no_progress": [{
+		"value": true
+	}],
+	"#": "only used in LuciphorAdapter at the moment, inconsistency will be fixed",
+	"num_threads": [{
+		"value":"${GALAXY_SLOTS:-1}"
+	}],
+	"threads": [{
+		"value": "${GALAXY_SLOTS:-1}"
+	}],
+	"sirius:cores": [{
+		"value": "${GALAXY_SLOTS:-1}"
+	}],
+
+	"#": "hardcode the outer loop threads for OpenSwathWorkflow",
+	"outer_loop_threads": [{
+			"value": "1", 
+			"tools": ["OpenSwathWorkflow"]
+	}],
+	"separator": [{
+		"value": ",",
+		"tools": ["IDMassAccuracy"]
+	}],
+
+	"#": "don't alow to copy data internally to save computation time for reloading",
+	"copy_data": [{
+		"value": "false",
+		"tools": ["MapAlignerTreeGuided"]
+	}],
+
+	"#": "overwrite/add Galaxy xml attributes of some parameters (names need to start with param_)",
+
+	"#": "test is not a hardcoded value since we need to set it in the tool tests", 
+	"test": [{
+		"CTD:type": "text",
+		"XML:type": "hidden"
+	}],
+
+	"#": "overwrite CTD attributes of some parameters (some are not possible, e.g. type)",
+
+	"#": "for some tools the user needs to select the desired output type since detection by extension makes no sense for galaxy tools",
+	"out_type": [{
+	    "CTD:required": true,
+	    "CTD:advanced": false
+	}],
+
+	"#": "SeedListGenerator with consensusXML input needs a dynamic number of outputs that depends on the content of the input, so we remove this options at the moment because its hard or impossible to implement in Galaxy, https://github.com/OpenMS/OpenMS/issues/4404 .. see also in parameter",
+	"#": "FileInfo, MapStatistics, SequenceCoverageCalculator wo -out just writes to stdout. not wanted here",
+	"#": "MzMLSplitter output prefix https://github.com/OpenMS/OpenMS/issues/4404",
+	"#": "IDRipper: blacklist out (is doing the same as the output-prefix out-path)",
+	"out": [{
+		"CTD:is_list": false, 
+		"tools": ["SeedListGenerator"]
+	}, {
+	 	"CTD:required": true,
+		"tools": ["FileInfo", "MapStatistics", "SequenceCoverageCalculator"]
+	}, {
+		"CTD:type": "output-prefix", 
+		"CTD:required": true,
+		"CTD:restrictions": "mzml",
+		"tools": ["MzMLSplitter"]
+	}, {
+		"value": "@", 
+		"tools": ["IDRipper"]
+	}],
+
+	"#": "Try to remove xml data type whereever possible",
+	"#": "XTandem Adapter output is called .xml in OMS which is to unspecific -> use Galaxy's bioml",
+	"xml_out": [{
+		"CTD:restrictions": "bioml",
+		"tools": ["XTandemAdapter"]
+	}],
+	
+	"#": "IDFileConverter remove xml",
+	"#": "OpenSwathWorkflow make in single file input and all outputs non-optional",
+        "#": "XFDR does not need xml .. redundant with xquest.xml TODO check if list is up to date with each new release",
+	"#": "SeedListGenerator: remove consensusXML https://github.com/OpenMS/OpenMS/issues/4404 .. see also out parameter",
+	"in": [{
+		"CTD:restrictions": "pepXML,protXML,mascotXML,omssaXML,bioml,psms,tsv,idXML,mzid,xquest.xml",
+		"tools": ["IDFileConverter"]
+	}, {
+		"CTD:is_list": false, 
+		"tools": ["OpenSwathWorkflow"]
+	}, {
+		"CTD:restrictions": "idXML,mzid,xquest.xml",
+		"tools": ["XFDR"]
+	}, {
+		"CTD:restrictions": "mzML,idXML,featureXML",
+		"tools": ["SeedListGenerator"]
+	}],
+
+	"#": "IDMapper has in and spectra:in params, in is used in out as format_source",
+	"#": "which does not work in Galaxy: https://github.com/galaxyproject/galaxy/pull/9493", 
+	"spectra:in": [{
+		"CTD:name": "_in", 
+		"tools": ["IDMapper"]
+	}],
+
+	"#": "hardcoding prefix parameters which are not yet available in OMS but in CTDOpts https://github.com/OpenMS/OpenMS/pull/4527",
+	"#": "output-prefix",
+	"out_path": [{
+		"CTD:type": "output-prefix", 
+		"CTD:required": true,
+		"CTD:restrictions": "idXML",
+		"tools": ["IDRipper"]
+	}],
+	"outputDirectory": [{
+		"CTD:type": "output-prefix", 
+		"CTD:advanced": false,
+		"CTD:required": true,
+		"CTD:restrictions": "mzml",
+		"tools": ["OpenSwathFileSplitter"]
+	}],
+
+	"#": "OpenSwathDIAPreScoring: https://github.com/OpenMS/OpenMS/pull/4443",
+        "#": "SpectraSTSearchAdapter does not need xml .. redundant with pep.xml TODO check if list is up to date with each new release",
+	"output_files": [{
+		"CTD:required": true,
+		"tools": ["OpenSwathDIAPreScoring"]
+	}, {
+		"CTD:restrictions": "txt,tsv,pep.xml,pepXML,html",
+		"tools": ["SpectraSTSearchAdapter"]
+	
+	}]
+}
--- a/macros.xml	Sat Oct 31 10:12:41 2020 +0000
+++ b/macros.xml	Fri Nov 06 18:20:41 2020 +0000
@@ -20,6 +20,7 @@
   </xml>
   <xml name="stdio">
     <stdio>
+      <regex match="std::bad_alloc" level="fatal_oom" description="Could not allocate memory"/>
       <regex match="Could not allocate metaspace" level="fatal_oom" description="Java memory Exception"/>
       <regex match="Cannot create VM thread" level="fatal_oom" description="Java memory Exception"/>
       <regex match="qUncompress: could not allocate enough memory to uncompress data" level="fatal_oom" description="Java memory Exception"/>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_test_data_manual.sh	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,161 @@
+MSSimulator -test -in DecoyDatabase_1.fasta -out MSsimulator.mzml -algorithm:RandomNumberGenerators:biological reproducible -algorithm:RandomNumberGenerators:technical reproducible > MSSimulator_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MSSimulator_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+MSSimulator -test -in DecoyDatabase_1.fasta -out MSsimulator_MALDI.mzml -algorithm:RandomNumberGenerators:biological reproducible -algorithm:RandomNumberGenerators:technical reproducible -algorithm:MSSim:Global:ionization_type MALDI > MSSimulator_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MSSimulator_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+ClusterMassTracesByPrecursor -test -in_ms1 ConsensusMapNormalizer_input.consensusXML -in_swath ConsensusMapNormalizer_input.consensusXML -out ClusterMassTracesByPrecursor.mzml > ClusterMassTracesByPrecursor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'ClusterMassTracesByPrecursor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+ClusterMassTraces -test -in ConsensusMapNormalizer_input.consensusXML -out ClusterMassTraces.mzml > ClusterMassTraces.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'ClusterMassTraces failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+CVInspector -test -cv_files CHEMISTRY/XLMOD.obo -cv_names XLMOD -mapping_file MAPPING/ms-mapping.xml -html CVInspector.html > CVInspector.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'CVInspector failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+DeMeanderize -test -in MSsimulator_MALDI.mzml -out DeMeanderize.mzml > DeMeanderize.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'DeMeanderize failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# TODO DigestorMotif
+
+Digestor -test -in random.fa -out Digestor.fasta -out_type fasta > Digestor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'Digestor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+EICExtractor -test -in spectra.mzML -pos FileConverter_10_input.edta -out EICExtractor.csv > EICExtractor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'EICExtractor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+#TODO ERPairFinder
+
+FeatureFinderIsotopeWavelet -test -in FeatureFinderCentroided_1_input.mzML -out  FeatureFinderIsotopeWavelet.featureXML > FeatureFinderIsotopeWavelet.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'FeatureFinderIsotopeWavelet failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+
+FFEval -test -in  FeatureFinderCentroided_1_output.featureXML -truth  FeatureFinderCentroided_1_output.featureXML -out  FFEval.featureXML -out_roc FFEval_roc.csv  > FFEval.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'FFEval failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# TODO? deprecated IDDecoyProbability
+
+IDExtractor -test -in MSGFPlusAdapter_1_out.idXML -best_hits -number_of_peptides  1 -out  IDExtractor.idXML   > IDExtractor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'IDExtractor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+LabeledEval -test -in  FeatureLinkerLabeled_1_input.featureXML -truth  FeatureLinkerLabeled_1_output.consensusXML> LabeledEval.txt > LabeledEval.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'LabeledEval failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+MapStatistics -test -in SiriusAdapter_3_input.featureXML -out MapStatistics.txt > MapStatistics_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MapStatistics_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+MapStatistics -test -in ConsensusXMLFile_1.consensusXML -out MapStatistics2.txt > MapStatistics_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MapStatistics_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+MetaboliteAdductDecharger -test -in Decharger_input.featureXML -out_cm MetaboliteAdductDecharger_cm.consensusXML -out_fm MetaboliteAdductDecharger_fm.featureXML -outpairs MetaboliteAdductDecharger_pairs.consensusXML > MetaboliteAdductDecharger.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MetaboliteAdductDecharger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+MetaboliteSpectralMatcher -test -in spectra.mzML -database MetaboliteSpectralDB.mzML -out MetaboliteSpectralMatcher.mzTab > MetaboliteSpectralMatcher.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MetaboliteSpectralMatcher failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# TODO MRMPairFinder
+
+# generate two inputs for OpenSwathDIAPreScoring
+OpenSwathDIAPreScoring -tr OpenSwathWorkflow_1_input.TraML -swath_files OpenSwathAnalyzer_2_swathfile.mzML -output_files OpenSwathDIAPreScoring.tsv > OpenSwathDIAPreScoring.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathDIAPreScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# generate two inputs for OpenSwathDIAPreScoring by linking
+ln -s OpenSwathAnalyzer_2_swathfile.mzML OpenSwathDIAPreScoring_in1.mzML
+ln -s OpenSwathAnalyzer_2_swathfile.mzML OpenSwathDIAPreScoring_in2.mzML
+OpenSwathDIAPreScoring -tr OpenSwathWorkflow_1_input.TraML -swath_files OpenSwathDIAPreScoring_in1.mzML OpenSwathDIAPreScoring_in2.mzML -output_files OpenSwathDIAPreScoring_2_1.tsv OpenSwathDIAPreScoring_2_2.tsv > OpenSwathDIAPreScoring.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathDIAPreScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+OpenSwathRewriteToFeatureXML -featureXML OpenSwathFeatureXMLToTSV_input.featureXML -out OpenSwathRewriteToFeatureXML.featureXML > OpenSwathRewriteToFeatureXML.stdout 2> stderr
+# if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathRewriteToFeatureXML failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# adapted from the commented tests in OpenMS TODO may be removed later https://github.com/OpenMS/OpenMS/issues/4719
+FileConverter -in PepNovo.mzXML -out PepNovo_1.mzML > /dev/null 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'FileConverter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+PepNovoAdapter -ini PepNovoAdapter_1_parameters.ini -in PepNovo_1.mzML -out PepNovoAdapter_3_output.idXML -model_directory pepnovo_models/ -pepnovo_executable pepnovo > PepNovo_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PhosphoScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+FileConverter -in PepNovo.mzData -out PepNovo_4.mzML > /dev/null 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'FileConverter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+PepNovoAdapter -ini PepNovoAdapter_1_parameters.ini -in PepNovo_4.mzML -out PepNovoAdapter_4_output.idXML -model_directory pepnovo_models/ -pepnovo_executable pepnovo > PepNovo_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PhosphoScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+#PepNovoAdapter -ini PepNovoAdapter_5_parameters.ini -in PepNovoAdapter_5_output.pepnovo_out -out PepNovoAdapter_5_output.idXML -model_directory pepnovo_models/ 
+
+# TODO PhosphoScoring 
+PhosphoScoring -in spectra.mzML -id MSGFPlusAdapter_1_out1.tmp -out PhosphoScoring.idxml > PhosphoScoring.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PhosphoScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML XTandemAdapter_1_out.idXML -multiple_search_engines -skip_db_check -out PSMFeatureExtractor.idxml > PSMFeatureExtractor_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML XTandemAdapter_1_out.idXML -multiple_search_engines -skip_db_check -out PSMFeatureExtractor.mzid > PSMFeatureExtractor_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+QCCalculator -test -in OpenPepXL_input.mzML -out QCCalculator1.qcML > QCCalculator_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+QCCalculator -test -in OpenPepXL_input.mzML -id OpenPepXL_output.idXML -consensus OpenPepXL_input.consensusXML -out QCCalculator2.qcML > QCCalculator_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+QCCalculator -test -in IDMapper_4_input.mzML -id IDMapper_4_input.idXML -feature IDMapper_4_input.featureXML -out QCCalculator3.qcML > QCCalculator_3.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_3 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# TODO QCEmbedder
+# TODO QCExporter
+# TODO QCExtractor
+# TODO QCImporter
+
+QCMerger -test -in QCCalculator1.qcML QCCalculator3.qcML -out QCMerger.qcML > QCMerger.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCMerger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+QCShrinker -test -in QCCalculator1.qcML -out QCShrinker.qcML > QCShrinker.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCShrinker failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+RNADigestor -test -in random_RNA.fa -out RNADigestor.fasta > RNADigestor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'RNADigestor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+RNPxlXICFilter -test -control FileFilter_1_input.mzML -treatment FileFilter_1_input.mzML -out RNPxlXICFilter.mzML > RNPxlXICFilter.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'RNPxlXICFilter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+RTEvaluation -in PeptideIndexer_1.idXML -out RTEvaluation.tsv > RTEvaluation.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'RTEvaluation failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SemanticValidator -test -in FileFilter_1_input.mzML -mapping_file MAPPING/ms-mapping.xml > SemanticValidator.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SemanticValidator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+IDFilter -in PeptideIndexer_1.idXML -best:strict -out SequenceCoverageCalculator_1.idXML > IDFilter.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'IDFilter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+SequenceCoverageCalculator -test -in_database  PeptideIndexer_1.fasta -in_peptides  SequenceCoverageCalculator_1.idXML  -out  SequenceCoverageCalculator.txt > SequenceCoverageCalculator.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SequenceCoverageCalculator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# TODO SpecLibCreator
+
+SpectraFilterBernNorm -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterBernNorm.mzML > SpectraFilterBernNorm.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterBernNorm failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraFilterMarkerMower -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterMarkerMower.mzML > SpectraFilterMarkerMower.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterMarkerMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraFilterNLargest -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterNLargest.mzML > SpectraFilterNLargest.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterNLargest failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraFilterNormalizer -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterNormalizer.mzML > SpectraFilterNormalizer.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterNormalizer failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraFilterParentPeakMower -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterParentPeakMower.mzML > SpectraFilterParentPeakMower.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterParentPeakMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraFilterScaler -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterScaler.mzML > SpectraFilterScaler.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterScaler failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraFilterThresholdMower -test -in  SpectraFilterSqrtMower_1_input.mzML -out  SpectraFilterThresholdMower.mzML > SpectraFilterThresholdMower.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterThresholdMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+SpectraMerger -test -in NovorAdapter_in.mzML -out SpectraMerger_1.mzML > SpectraMerger.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraMerger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+# TODO SvmTheoreticalSpectrumGeneratorTrainer
+
+TransformationEvaluation -test -in FileInfo_16_input.trafoXML -out TransformationEvaluation.trafoXML > TransformationEvaluation.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'TransformationEvaluation failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
+
+XMLValidator -test -in FileFilter_1_input.mzML > XMLValidator.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'XMLValidator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/    /')"; fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.md	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,153 @@
+Galaxy wrapper for OpenMS
+=========================
+
+OpenMS is an open-source software C++ library for LC/MS data management and analyses.
+It offers an infrastructure for the rapid development of mass spectrometry related software.
+OpenMS is free software available under the three clause BSD license and runs under Windows, MacOSX and Linux.
+
+More informations are available at:
+
+ * https://github.com/OpenMS/OpenMS
+ * https://www.openms.de/
+
+The wrappers for these tools and most of their tests are automatically
+generated using the `generate.sh` script. The generation of the tools is
+based on the CTDConverter (https://github.com/WorkflowConversion/CTDConverter)
+which can be fine tuned via the `hardcoded_params.json` file. This file allows
+to blacklist and hardcode parameters and to modify or set arbitrary
+CTD/XML attributes.
+
+Note that, due to its size, the test data is excluded from this repository. In
+order to generate the test data on call `test-data.sh`.
+
+Manual updates should only be done to
+
+- the `@GALAXY_VERSION@"` token in `macros.xml`
+- and the manually contributed tests in `macros_test.xml` (The goal is that all
+  tools that do not have an automatically generated test are covered here)
+- the `hardcoded_params.json` files
+
+In a few cases patches may be acceptable.
+
+Installation
+============
+
+The Galaxy OpenMS tools can be installed from the toolshed. While most tools
+will work out of the box some need attention since requirements can not be
+fulfilled via Conda:
+
+Not yet in Conda are:
+
+- SpectraST (http://tools.proteomecenter.org/wiki/index.php?title=SpectraST)
+- MaRaCluster (https://github.com/statisticalbiotechnology/maracluster)
+
+Binaries for these tools can easily be obtained via: 
+
+```
+VERSION=....
+git git clone -b release/$VERSION.0 https://github.com/OpenMS/OpenMS.git OpenMS$VERSION.0-git
+git submodule init OpenMS$VERSION.0-git
+git submodule update OpenMS$VERSION.0-git
+```
+
+They are located in `OpenMS$VERSION-git/THIRDPARTY/`. 
+
+Not in Conda due to licencing restrictions:
+
+- Mascot http://www.matrixscience.com/
+- MSFragger https://github.com/Nesvilab/MSFragger
+- Novor http://www.rapidnovor.org/novor
+
+There are multiple ways to enable the Galaxy tools to use these binaries. 
+
+- Just copy them to the `bin` path within Galaxy's conda environment
+- Put them in any other path that that is included in PATH
+- Edit the corresponding tools: In the command line part search for the parameters `-executable`, `-maracluster_executable`, or `-mascot_directory` and edit them appropriately.
+
+Working
+=======
+
+The tools work by:
+
+Preprocessing:
+
+- For each input / output data set parameter a directory is crated (named by
+  the parameter)
+- For input data set parameters the links to the actual location of the data
+  sets are created
+
+Main:
+
+- The galaxy wrapper create two json config files: one containing the
+  parameters and the values chosen by the user and the other the values of
+  hardcoded parameters.
+- With `OpenMSTool -write_ctd ./` a CTD (names OpenMSTool.ctd) file is
+  generated that contains the default values.
+- A call to `fill_ctd.py` fills in the values from the json config files into
+  the CTD file
+- The actual tool is called `OpenMSTool -ini OpenMSTool.ctd` and also all input
+  and output parameters are given on the command line.
+
+Postprocessing:
+
+- output data sets are moved to the final locations
+
+Note: The reason for handling data sets on the command line (and not specifying
+them in the CTD file) is mainly that all files in Galaxy have the extension
+`.dat` and OpenMS tools require an appropriate extension. But this may change
+in the future.
+
+Generating OpenMS wrappers
+==========================
+
+1. remove old test data: `rm -rf $(ls -d test-data/* | egrep -v "random|\.loc")`
+2. `./generate.sh`
+
+Whats happening:
+
+1. The binaries of the OpenMS package can generate a CTD file that describes
+   the parameters. These CTD files are converted to xml Galaxy tool descriptions
+   using the `CTDConverter`.
+
+2. The CI testing framework of OpenMS contains command lines and test data 
+   (https://github.com/OpenMS/OpenMS/tree/develop/src/tests/topp). These tests
+   are described in two CMake files.
+
+   - From these CMake files Galaxy tests are auto generated and stored in `macros_autotest.xml`
+   - The command lines are stored in `prepare_test_data.sh` for regeneration of test data
+
+More details can be found in the comments of the shell script.
+
+Open problems
+=============
+
+Some tools stall in CI testing using `--biocontainers` which is why the OpenMS
+tools are currently listed in `.tt_biocontainer_skip`. This is
+
+- AssayGeneratorMetabo and SiriusAdapter (both depend on sirius)
+- OMSSAAdapter
+
+Using `docker -t` seems to solve the problem (see
+https://github.com/galaxyproject/galaxy/issues/10153).
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data.sh	Fri Nov 06 18:20:41 2020 +0000
@@ -0,0 +1,265 @@
+#!/usr/bin/env bash
+
+VERSION=2.6
+FILETYPES="filetypes.txt"
+CONDAPKG="https://anaconda.org/bioconda/openms/2.6.0/download/linux-64/openms-2.6.0-h4afb90d_0.tar.bz2"
+
+# import the magic
+. ./generate-foo.sh
+
+# install conda
+if [ -z "$tmp" ]; then
+	tmp=$(mktemp -d)
+	created="yes"
+fi
+
+export OPENMSGIT="$tmp/OpenMS$VERSION.0-git"
+export OPENMSPKG="$tmp/OpenMS$VERSION-pkg/"
+export OPENMSENV="$tmp/OpenMS$VERSION-env"
+export CTDCONVERTER="$tmp/CTDConverter"
+
+if [[ -z "$1" ]]; then
+	autotests="/dev/null"
+else
+	autotests="$1"
+fi
+
+if type conda > /dev/null; then  
+	true
+else
+	wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+	bash Miniconda3-latest-Linux-x86_64.sh -b -p "$tmp/miniconda"
+	source "$tmp/miniconda/bin/activate"
+fi
+eval "$(conda shell.bash hook)"
+
+
+###############################################################################
+## get 
+## - conda environment (for executing the binaries) and 
+## - the git clone of OpenMS (for generating the tests)
+###############################################################################
+
+echo "Clone OpenMS $VERSION sources"
+if [[ ! -d $OPENMSGIT ]]; then
+	git clone -b release/$VERSION.0 https://github.com/OpenMS/OpenMS.git $OPENMSGIT
+	cd $OPENMSGIT
+	git submodule init
+	git submodule update
+	cd -
+else
+	cd $OPENMSGIT
+	git pull origin release/$VERSION.0
+	cd -
+fi
+
+echo "Create OpenMS $VERSION conda env"
+# TODO currently add lxml (needed by CTDConverter)
+# TODO for some reason a to recent openjdk is used
+if conda env list | grep "$OPENMSENV"; then
+	true
+else
+	conda create -y --quiet --override-channels --channel iuc --channel conda-forge --channel bioconda --channel defaults -p $OPENMSENV openms=$VERSION openms-thirdparty=$VERSION ctdopts=1.4 lxml
+# chmod -R u-w $OPENMSENV 
+fi
+###############################################################################
+## get the 
+## - conda package (for easy access and listing of the OpenMS binaries), 
+###############################################################################
+echo "Download OpenMS $VERSION package $CONDAPKG"
+
+if [[ ! -d $OPENMSPKG ]]; then
+	mkdir $OPENMSPKG
+	wget -q -P $OPENMSPKG/ "$CONDAPKG"
+	tar -xf $OPENMSPKG/"$(basename $CONDAPKG)" -C $OPENMSPKG/
+	rm $OPENMSPKG/"$(basename $CONDAPKG)"
+fi
+
+###############################################################################
+## Get python libaries for CTD -> Galaxy conversion
+## TODO fix to main repo OR conda packkage if PRs are merged 
+###############################################################################
+echo "Clone CTDConverter"
+if [[ ! -d $CTDCONVERTER ]]; then
+	#git clone https://github.com/WorkflowConversion/CTDConverter.git CTDConverter
+	git clone -b topic/cdata https://github.com/bernt-matthias/CTDConverter.git $CTDCONVERTER
+else
+	cd $CTDCONVERTER
+	git pull origin topic/cdata
+	cd -
+fi
+
+###############################################################################
+## copy all the test data files to test-data
+## most of it (outputs) will be overwritten later, but its needed for
+## prepare_test_data
+###############################################################################
+echo "Get test data"
+find test-data -type f,l,d ! -name "*fa"  ! -name "*loc" -delete
+
+cp $(find $OPENMSGIT/src/tests/topp/ -type f | grep -Ev "third_party_tests.cmake|CMakeLists.txt|check_ini") test-data/
+cp -r $OPENMSGIT/share/OpenMS/MAPPING/ test-data/
+cp -r $OPENMSGIT/share/OpenMS/CHEMISTRY test-data/
+cp -r $OPENMSGIT/share/OpenMS/examples/ test-data/
+if [[ ! -f test-data/MetaboliteSpectralDB.mzML ]]; then 
+	wget -nc https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Tutorials/Data/latest/Example_Data/Metabolomics/databases/MetaboliteSpectralDB.mzML
+	mv MetaboliteSpectralDB.mzML test-data/
+fi
+ln -fs TOFCalibration_ref_masses test-data/TOFCalibration_ref_masses.txt
+ln -fs TOFCalibration_const test-data/TOFCalibration_const.csv
+
+if [ ! -d test-data/pepnovo_models/ ]; then
+	mkdir -p /tmp/pepnovo
+	wget -nc http://proteomics.ucsd.edu/Software/PepNovo/PepNovo.20120423.zip
+	unzip PepNovo.20120423.zip -d /tmp/pepnovo/
+	mv /tmp/pepnovo/Models test-data/pepnovo_models/
+	rm PepNovo.20120423.zip
+	rm -rf /tmp/pepnovo
+fi
+###############################################################################
+## generate ctd files using the binaries in the conda package 
+###############################################################################
+echo "Create CTD files"
+conda activate $OPENMSENV
+rm -rf ctd
+mkdir -p ctd
+
+# TODO because of https://github.com/OpenMS/OpenMS/issues/4641
+# this needs to be done from within test-data
+cd test-data
+for i in $OPENMSPKG/bin/*
+do
+	b=$(basename $i)
+	echo $b
+	$b -write_ctd ../ctd/
+	sed -i -e 's/²/^2/' ../ctd/$b.ctd
+done
+cd -
+###############################################################################
+## fix ini files: OpenMS test data contains ini files with outdated ini files.
+## e.g. variables might be in different nodes, outdated variables present, new
+## variables missing, ...
+## OpenMS tools fix this on the fly (so its no problem for the OpenMS tests)
+## but it is for the generation of the tests
+## see https://github.com/OpenMS/OpenMS/issues/4462
+###############################################################################
+echo "Update test INI files"
+for ini in test-data/*ini
+do
+	tool=$(cat $ini | grep 'NODE name="' | head -n 1 | sed 's/.*name="\([^"]\+\)".*/\1/')
+	bin=$(which $tool)
+	if [[ -z $bin ]]; then
+          >&2 echo "missing binary to convert $ini"
+		  continue
+	fi
+	cp $ini $ini.backup
+	$bin -ini $ini -write_ini $ini > $ini.stdout 2> $ini.stderr
+	if [[ "$?" -ne "0" ]]; then
+		>&2 echo "could not convert $ini"
+	fi
+done
+
+###############################################################################
+## create script to create results for the tests and run it
+###############################################################################
+echo "Create test shell script"
+
+echo -n "" > prepare_test_data.sh
+echo 'export COMET_BINARY="comet"' >> prepare_test_data.sh
+echo 'export CRUX_BINARY="crux"' >> prepare_test_data.sh
+echo 'export FIDOCHOOSEPARAMS_BINARY="FidoChooseParameters"' >> prepare_test_data.sh
+echo 'export FIDO_BINARY="Fido"' >> prepare_test_data.sh
+echo 'export LUCIPHOR_BINARY="$(dirname $(realpath $(which luciphor2)))/luciphor2.jar"' >> prepare_test_data.sh
+
+echo 'export MARACLUSTER_BINARY="'"$OPENMSGIT"'/THIRDPARTY/Linux/64bit/MaRaCluster/maracluster"'>> prepare_test_data.sh
+echo 'export MSFRAGGER_BINARY="/home/berntm/Downloads/MSFragger-20171106/MSFragger-20171106.jar"'>> prepare_test_data.sh
+echo 'export MSGFPLUS_BINARY="$(msgf_plus -get_jar_path)"' >> prepare_test_data.sh
+echo 'export MYRIMATCH_BINARY="myrimatch"'>> prepare_test_data.sh
+echo 'export NOVOR_BINARY="/home/berntm/Downloads/novor/lib/novor.jar"' >> prepare_test_data.sh
+echo 'export OMSSA_BINARY="$(dirname $(realpath $(which omssacl)))/omssacl"'>> prepare_test_data.sh
+echo 'export PERCOLATOR_BINARY="percolator"'>> prepare_test_data.sh
+echo 'export SIRIUS_BINARY="$(which sirius)"' >> prepare_test_data.sh
+echo 'export SPECTRAST_BINARY="'"$OPENMSGIT"'/THIRDPARTY/Linux/64bit/SpectraST/spectrast"' >> prepare_test_data.sh
+echo 'export XTANDEM_BINARY="xtandem"' >> prepare_test_data.sh
+echo 'export THERMORAWFILEPARSER_BINARY="ThermoRawFileParser.exe"' >> prepare_test_data.sh
+
+prepare_test_data >> prepare_test_data.sh #tmp_test_data.sh
+
+# prepare_test_data > tmp_test_data.sh
+# # remove calls not needed for the tools listed in any .list file
+# echo LIST $LIST
+# if [ ! -z "$LIST" ]; then
+# 	REX=$(echo $LIST | sed 's/ /\n/g' | sed 's@.*/\([^/]\+\).xml$@\1@' | tr '\n' '|' | sed 's/|$//')
+# else
+# 	REX=".*"
+# fi
+# echo REX $REX
+# cat tmp_test_data.sh | egrep "($REX)" >> prepare_test_data.sh
+# rm tmp_test_data.sh
+
+echo "Execute test shell script"
+chmod u+x prepare_test_data.sh
+cd ./test-data || exit
+../prepare_test_data.sh
+cd - || exit
+
+
+###############################################################################
+## create/update test data for the manually generated tests
+## - run convert once with the manual tests only and 
+## - update test-data (needs to run 2x)
+###############################################################################
+echo "Execute test shell script for manually curated tests"
+chmod u+x prepare_test_data_manual.sh
+
+cd ./test-data || exit
+../prepare_test_data_manual.sh
+cd - || exit
+
+
+###############################################################################
+## auto generate tests
+###############################################################################
+echo "Write test macros to $autotests"
+echo "<macros>" > "$autotests"
+for i in $(ls *xml |grep -v macros)
+do
+	b=$(basename "$i" .xml)
+	get_tests2 "$b" >> "$autotests"
+done
+echo "</macros>" >> "$autotests"
+
+echo "Create test data links"
+link_tmp_files
+
+# tests for tools using output_prefix parameters can not be auto generated
+# hence we output the tests for manual curation in macros_test.xml
+# and remove them from the autotests
+# -> OpenSwathFileSplitter IDRipper MzMLSplitter
+#
+# Furthermore we remove tests for tools without binaries in conda
+# -> MSFragger MaRaClusterAdapter NovorAdapter 
+#
+# not able to specify composite test data  
+# -> SpectraSTSearchAdapter 
+if [[ ! -z "$1" ]]; then
+	echo "" > macros_discarded_auto.xml
+	for i in OpenSwathFileSplitter IDRipper MzMLSplitter MSFraggerAdapter MaRaClusterAdapter NovorAdapter SpectraSTSearchAdapter
+	do
+		echo "<xml name=\"manutest_$i\">" >>  macros_discarded_auto.xml
+		xmlstarlet sel -t -c "/macros/xml[@name='autotest_$i']/test" macros_autotest.xml >>  macros_discarded_auto.xml
+		echo "</xml>"  >>  macros_discarded_auto.xml
+		xmlstarlet ed -d "/macros/xml[@name='autotest_$i']/test" macros_autotest.xml > tmp
+		mv tmp macros_autotest.xml
+	done
+	>&2 echo "discarded autogenerated macros for curation in macros_discarded_auto.xml"
+fi
+conda deactivate
+
+## remove broken symlinks in test-data
+find test-data/ -xtype l -delete
+
+# if [ ! -z "$created" ]; then
+# 	echo "Removing temporary directory"
+# 	rm -rf "$tmp"
+# fi