diff 2d_feature_extraction.py @ 5:5530132d500e draft

planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/2d_feature_extraction/ commit bb2d58ed37d8eb09583b86e3cdd9f5d1b56c42a0
author imgteam
date Sun, 04 Jan 2026 20:56:17 +0000
parents a4bc9dfde846
children 048545339ced
line wrap: on
line diff
--- a/2d_feature_extraction.py	Wed Dec 17 11:20:49 2025 +0000
+++ b/2d_feature_extraction.py	Sun Jan 04 20:56:17 2026 +0000
@@ -1,127 +1,115 @@
-import argparse
-
-import giatools.io
+import giatools
 import numpy as np
 import pandas as pd
-import skimage.feature
+import scipy.ndimage as ndi
 import skimage.measure
-import skimage.morphology
-import skimage.segmentation
+
+# Fail early if an optional backend is not available
+giatools.require_backend('omezarr')
+
+
+def surface(labels: np.ndarray, label: int) -> int:
+    """
+    Ad-hoc implementation for computation of the "perimeter" of an object in 3D (that is a surface).
+    """
+    assert labels.ndim == 3  # sanity check
+
+    # Create 3-D structuring element with 4-connectivity
+    selem = np.zeros((3, 3, 3), bool)
+    for ijk in np.ndindex(*selem.shape):
+        if (np.array(ijk) == 1).sum() >= 2:
+            selem[*ijk] = True   # noqa: E999
+    assert selem.sum() == 7  # sanity check
+
+    # Compute the area of the surface
+    cc = (labels == label)
+    cc_interior = ndi.binary_erosion(cc, selem)
+    surface = np.logical_xor(cc, cc_interior)
+    return surface.sum()  # number of voxels on the surface of the object
+
+
+def compute_if_dask(obj):
+    """
+    Return the computed object or array if it is a Dask array or deferred computable Dask object.
+    """
+    return obj.compute() if hasattr(obj, 'compute') else obj
 
 
 if __name__ == '__main__':
+    tool = giatools.ToolBaseplate()
+    tool.add_input_image('labels')
+    tool.add_input_image('intensities', required=False)
+    tool.parser.add_argument('--output', type=str)
+    tool.parse_args()
 
-    parser = argparse.ArgumentParser(description='Extract image features')
+    # Validate the input image
+    try:
+        label_image = tool.args.input_images['labels']
+        if any(label_image.shape[label_image.axes.index(axis)] > 1 for axis in label_image.axes if axis not in 'ZYX'):
+            raise ValueError(f'This tool is not applicable to images with {label_image.original_axes} axes.')
+
+        # Extract the image features
+        for section in tool.run('ZYX'):  # the validation code above guarantees that we will have only a single iteration
+            df = pd.DataFrame()
 
-    # TODO create factory for boilerplate code
-    features = parser.add_argument_group('compute features')
-    features.add_argument('--all', dest='all_features', action='store_true')
-    features.add_argument('--label', dest='add_label', action='store_true')
-    features.add_argument('--patches', dest='add_roi_patches', action='store_true')
-    features.add_argument('--max_intensity', dest='max_intensity', action='store_true')
-    features.add_argument('--mean_intensity', dest='mean_intensity', action='store_true')
-    features.add_argument('--min_intensity', dest='min_intensity', action='store_true')
-    features.add_argument('--moments_hu', dest='moments_hu', action='store_true')
-    features.add_argument('--centroid', dest='centroid', action='store_true')
-    features.add_argument('--bbox', dest='bbox', action='store_true')
-    features.add_argument('--area', dest='area', action='store_true')
-    features.add_argument('--filled_area', dest='filled_area', action='store_true')
-    features.add_argument('--convex_area', dest='convex_area', action='store_true')
-    features.add_argument('--perimeter', dest='perimeter', action='store_true')
-    features.add_argument('--extent', dest='extent', action='store_true')
-    features.add_argument('--eccentricity', dest='eccentricity', action='store_true')
-    features.add_argument('--equivalent_diameter', dest='equivalent_diameter', action='store_true')
-    features.add_argument('--euler_number', dest='euler_number', action='store_true')
-    features.add_argument('--inertia_tensor_eigvals', dest='inertia_tensor_eigvals', action='store_true')
-    features.add_argument('--major_axis_length', dest='major_axis_length', action='store_true')
-    features.add_argument('--minor_axis_length', dest='minor_axis_length', action='store_true')
-    features.add_argument('--orientation', dest='orientation', action='store_true')
-    features.add_argument('--solidity', dest='solidity', action='store_true')
-    features.add_argument('--moments', dest='moments', action='store_true')
-    features.add_argument('--convexity', dest='convexity', action='store_true')
+            # Get the labels array and cast to `uint8` if it is `bool` (`skimage.measure.regionprops` refuses `bool` typed arrays)
+            labels_section_data = section['labels'].data.squeeze()
+            if np.issubdtype(labels_section_data.dtype, bool):
+                print('Convert labels from bool to uint8')
+                labels_section_data = labels_section_data.astype(np.uint8)
 
-    parser.add_argument('--label_file_binary', dest='label_file_binary', action='store_true')
-
-    parser.add_argument('--raw', dest='raw_file', type=argparse.FileType('r'),
-                        help='Original input file', required=False)
-    parser.add_argument('label_file', type=argparse.FileType('r'),
-                        help='Label input file')
-    parser.add_argument('output_file', type=argparse.FileType('w'),
-                        help='Tabular output file')
-    args = parser.parse_args()
-
-    label_file_binary = args.label_file_binary
-    label_file = args.label_file.name
-    out_file = args.output_file.name
-    add_patch = args.add_roi_patches
-
-    raw_image = None
-    if args.raw_file is not None:
-        raw_image = giatools.io.imread(args.raw_file.name)
-
-    raw_label_image = giatools.io.imread(label_file)
-
-    df = pd.DataFrame()
-    if label_file_binary:
-        raw_label_image = skimage.measure.label(raw_label_image)
-    regions = skimage.measure.regionprops(raw_label_image, intensity_image=raw_image)
-
-    df['it'] = np.arange(len(regions))
+            # Some features currently cannot be computed from Dask arrays
+            if any(
+                feature_name in tool.args.params['features'] for feature_name in (
+                    'inertia_tensor_eigvals',
+                    'axis_major_length',
+                    'axis_minor_length',
+                    'eccentricity',
+                    'orientation',
+                    'moments_hu',
+                )
+            ):
+                labels_section_data = compute_if_dask(labels_section_data)
 
-    if add_patch:
-        df['image'] = df['it'].map(lambda ait: regions[ait].image.astype(np.float).tolist())
-        df['intensity_image'] = df['it'].map(lambda ait: regions[ait].intensity_image.astype(np.float).tolist())
-
-    # TODO no matrix features, but split in own rows?
-    if args.add_label or args.all_features:
-        df['label'] = df['it'].map(lambda ait: regions[ait].label)
+            # Compute the image features
+            if 'intensities' in tool.args.input_images:
+                regions = skimage.measure.regionprops(labels_section_data, intensity_image=section['intensities'].data.squeeze())
+            else:
+                regions = skimage.measure.regionprops(labels_section_data, intensity_image=None)
+            df['it'] = np.arange(len(regions))
+            for feature_name in tool.args.params['features']:
 
-    if raw_image is not None:
-        if args.max_intensity or args.all_features:
-            df['max_intensity'] = df['it'].map(lambda ait: regions[ait].max_intensity)
-        if args.mean_intensity or args.all_features:
-            df['mean_intensity'] = df['it'].map(lambda ait: regions[ait].mean_intensity)
-        if args.min_intensity or args.all_features:
-            df['min_intensity'] = df['it'].map(lambda ait: regions[ait].min_intensity)
-        if args.moments_hu or args.all_features:
-            df['moments_hu'] = df['it'].map(lambda ait: regions[ait].moments_hu)
+                # Add the object label
+                if feature_name == 'label':
+                    df['label'] = df['it'].map(lambda ait: regions[ait].label)
+
+                # Add the object perimeter/surface
+                elif feature_name == 'perimeter' and labels_section_data.ndim == 3:
+                    df['perimeter'] = df['it'].map(
+                        lambda ait: surface(labels_section_data, regions[ait].label),  # `skimage.measure.regionprops` cannot compute perimeters for 3-D data
+                    )
 
-    if args.centroid or args.all_features:
-        df['centroid'] = df['it'].map(lambda ait: regions[ait].centroid)
-    if args.bbox or args.all_features:
-        df['bbox'] = df['it'].map(lambda ait: regions[ait].bbox)
-    if args.area or args.all_features:
-        df['area'] = df['it'].map(lambda ait: regions[ait].area)
-    if args.filled_area or args.all_features:
-        df['filled_area'] = df['it'].map(lambda ait: regions[ait].filled_area)
-    if args.convex_area or args.all_features:
-        df['convex_area'] = df['it'].map(lambda ait: regions[ait].convex_area)
-    if args.perimeter or args.all_features:
-        df['perimeter'] = df['it'].map(lambda ait: regions[ait].perimeter)
-    if args.extent or args.all_features:
-        df['extent'] = df['it'].map(lambda ait: regions[ait].extent)
-    if args.eccentricity or args.all_features:
-        df['eccentricity'] = df['it'].map(lambda ait: regions[ait].eccentricity)
-    if args.equivalent_diameter or args.all_features:
-        df['equivalent_diameter'] = df['it'].map(lambda ait: regions[ait].equivalent_diameter)
-    if args.euler_number or args.all_features:
-        df['euler_number'] = df['it'].map(lambda ait: regions[ait].euler_number)
-    if args.inertia_tensor_eigvals or args.all_features:
-        df['inertia_tensor_eigvals'] = df['it'].map(lambda ait: regions[ait].inertia_tensor_eigvals)
-    if args.major_axis_length or args.all_features:
-        df['major_axis_length'] = df['it'].map(lambda ait: regions[ait].major_axis_length)
-    if args.minor_axis_length or args.all_features:
-        df['minor_axis_length'] = df['it'].map(lambda ait: regions[ait].minor_axis_length)
-    if args.orientation or args.all_features:
-        df['orientation'] = df['it'].map(lambda ait: regions[ait].orientation)
-    if args.solidity or args.all_features:
-        df['solidity'] = df['it'].map(lambda ait: regions[ait].solidity)
-    if args.moments or args.all_features:
-        df['moments'] = df['it'].map(lambda ait: regions[ait].moments)
-    if args.convexity or args.all_features:
-        perimeter = df['it'].map(lambda ait: regions[ait].perimeter)
-        area = df['it'].map(lambda ait: regions[ait].area)
-        df['convexity'] = area / (perimeter * perimeter)
+                # Skip features that are not available when processing 3-D images
+                elif feature_name in ('eccentricity', 'moments_hu', 'orientation') and labels_section_data.ndim == 3:
+                    print(f'Skip feature that is not available for 3-D images: "{feature_name}"')
+
+                # Add another feature from `regions` that was computed via `skimage.measure.regionprops`
+                else:
+                    try:
+                        df[feature_name] = df['it'].map(lambda ait: getattr(regions[ait], feature_name))
+                    except TypeError:
+                        raise ValueError(f'Unknown feature: "{feature_name}"')
 
-    del df['it']
-    df.to_csv(out_file, sep='\t', lineterminator='\n', index=False)
+            # Resolve any remaining Dask objects to the actual values (e.g., when processing Zarrs)
+            df = df.map(compute_if_dask)
+
+            # Convert lists/tuples/arrays to lists of plain Python numbers (e.g., float instead of np.float64)
+            df = df.map(
+                lambda obj: np.asarray(obj).tolist() if type(obj) in (list, tuple, np.ndarray) else obj,
+            )
+
+            del df['it']
+            df.to_csv(tool.args.raw_args.output, sep='\t', lineterminator='\n', index=False)
+
+    except ValueError as err:
+        exit(err.args[0])